Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -43,6 +43,7 @@ struct isl_basic_map; struct isl_id; struct isl_set; +struct isl_pw_aff; struct isl_union_set; struct isl_union_map; struct isl_space; @@ -656,6 +657,12 @@ /// @param NewDomain The new statement domain. void restrictDomain(__isl_take isl_set *NewDomain); + /// @brief Compute the isl representation for @p S. + /// + /// This will compute the isl representation for @p S and also restrict the + /// context of the SCoP accordingly if a computation in @p S could wrap. + __isl_give isl_pw_aff *getPwAff(const SCEV *S); + /// @brief Get the loop for a dimension. /// /// @param Dimension The dimension of the induction variable Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -79,19 +79,33 @@ public: /// @brief Translate a 'const SCEV *' to an isl_pw_aff. /// - /// @param Stmt The location at which the scalar evolution expression - /// is evaluated. - /// @param Expr The expression that is translated. - static __isl_give isl_pw_aff *getPwAff(ScopStmt *Stmt, const SCEV *Expr); + /// @param Stmt The location at which the scalar evolution expression + /// is evaluated. + /// @param Expr The expression that is translated. + /// @param useModulo Flag to indicate that modulo semantic should be used. + static __isl_give isl_pw_aff *getPwAff(ScopStmt *Stmt, const SCEV *Expr, + bool useModulo = false); private: isl_ctx *Ctx; int NbLoopSpaces; const Scop *S; - SCEVAffinator(const ScopStmt *Stmt); + /// @brief Flag to inidicate that modulo semantic should be used. + const bool useModulo; + + SCEVAffinator(const ScopStmt *Stmt, bool useModulo); int getLoopDepth(const Loop *L); + /// @brief Compute the non-wrapping version of @p PWA for the type of @p Expr. + /// + /// @param PWA The piece-wise affine function that might wrap. + /// @param Expr The SCEV that was translated to @p PWA. + /// @param Flags The nsw/nuw flags of the operation. + __isl_give isl_pw_aff *computeNonWrap(__isl_take isl_pw_aff *PWA, + const SCEV *Expr, + SCEV::NoWrapFlags Flags); + __isl_give isl_pw_aff *visit(const SCEV *Expr); __isl_give isl_pw_aff *visitConstant(const SCEVConstant *Expr); __isl_give isl_pw_aff *visitTruncateExpr(const SCEVTruncateExpr *Expr); @@ -109,21 +123,51 @@ friend struct SCEVVisitor; }; -SCEVAffinator::SCEVAffinator(const ScopStmt *Stmt) +SCEVAffinator::SCEVAffinator(const ScopStmt *Stmt, bool useModulo) : Ctx(Stmt->getIslCtx()), NbLoopSpaces(Stmt->getNumIterators()), - S(Stmt->getParent()) {} + S(Stmt->getParent()), useModulo(useModulo) {} -__isl_give isl_pw_aff *SCEVAffinator::getPwAff(ScopStmt *Stmt, - const SCEV *Scev) { +__isl_give isl_pw_aff *SCEVAffinator::getPwAff(ScopStmt *Stmt, const SCEV *Scev, + bool useModulo) { Scop *S = Stmt->getParent(); const Region *Reg = &S->getRegion(); S->addParams(getParamsInAffineExpr(Reg, Scev, *S->getSE())); - SCEVAffinator Affinator(Stmt); + SCEVAffinator Affinator(Stmt, useModulo); return Affinator.visit(Scev); } +__isl_give isl_pw_aff *SCEVAffinator::computeNonWrap(isl_pw_aff *PWA, + const SCEV *Expr, + SCEV::NoWrapFlags Flags) { + Expr->dump(); + if (Flags & SCEV::FlagNSW) + return PWA; + errs() << " No nsw!\n"; + + assert(Expr->getType()->isIntegerTy() && "SCEV did not have integer type"); + + unsigned Width = Expr->getType()->getIntegerBitWidth(); + isl_ctx *Ctx = isl_pw_aff_get_ctx(PWA); + + isl_set *Domain = isl_pw_aff_domain(isl_pw_aff_copy(PWA)); + + isl_val *ModVal = isl_val_int_from_ui(Ctx, Width); + ModVal = isl_val_2exp(ModVal); + + isl_val *AddVal = isl_val_int_from_ui(Ctx, Width - 1); + AddVal = isl_val_2exp(AddVal); + + isl_pw_aff *AddPW = isl_pw_aff_val_on_domain(Domain, AddVal); + + PWA = isl_pw_aff_add(PWA, isl_pw_aff_copy(AddPW)); + PWA = isl_pw_aff_mod_val(PWA, ModVal); + PWA = isl_pw_aff_sub(PWA, AddPW); + + return PWA; +} + __isl_give isl_pw_aff *SCEVAffinator::visit(const SCEV *Expr) { // In case the scev is a valid parameter, we do not further analyze this // expression, but create a new parameter in the isl_pw_aff. This allows us @@ -189,7 +233,8 @@ Sum = isl_pw_aff_add(Sum, NextSummand); } - // TODO: Check for NSW and NUW. + if (useModulo) + Sum = computeNonWrap(Sum, Expr, Expr->getNoWrapFlags()); return Sum; } @@ -202,8 +247,13 @@ // only of parameters and we will stop in the visit(const SCEV *) function and // return the isl_pw_aff for that parameter. auto ConstantAndLeftOverPair = extractConstantFactor(Expr, *S->getSE()); - return isl_pw_aff_mul(visit(ConstantAndLeftOverPair.first), - visit(ConstantAndLeftOverPair.second)); + isl_pw_aff *MulPWA = isl_pw_aff_mul(visit(ConstantAndLeftOverPair.first), + visit(ConstantAndLeftOverPair.second)); + + if (useModulo) + MulPWA = computeNonWrap(MulPWA, Expr, Expr->getNoWrapFlags()); + + return MulPWA; } __isl_give isl_pw_aff *SCEVAffinator::visitUDivExpr(const SCEVUDivExpr *Expr) { @@ -214,37 +264,26 @@ SCEVAffinator::visitAddRecExpr(const SCEVAddRecExpr *Expr) { assert(Expr->isAffine() && "Only affine AddRecurrences allowed"); - // Directly generate isl_pw_aff for Expr if 'start' is zero. - if (Expr->getStart()->isZero()) { - assert(S->getRegion().contains(Expr->getLoop()) && - "Scop does not contain the loop referenced in this AddRec"); + assert(S->getRegion().contains(Expr->getLoop()) && + "Scop does not contain the loop referenced in this AddRec"); - isl_pw_aff *Start = visit(Expr->getStart()); - isl_pw_aff *Step = visit(Expr->getOperand(1)); - isl_space *Space = isl_space_set_alloc(Ctx, 0, NbLoopSpaces); - isl_local_space *LocalSpace = isl_local_space_from_space(Space); + isl_pw_aff *Start = visit(Expr->getStart()); + isl_pw_aff *Step = visit(Expr->getOperand(1)); + isl_space *Space = isl_space_set_alloc(Ctx, 0, NbLoopSpaces); + isl_local_space *LocalSpace = isl_local_space_from_space(Space); - int loopDimension = getLoopDepth(Expr->getLoop()); + int loopDimension = getLoopDepth(Expr->getLoop()); - isl_aff *LAff = isl_aff_set_coefficient_si( - isl_aff_zero_on_domain(LocalSpace), isl_dim_in, loopDimension, 1); - isl_pw_aff *LPwAff = isl_pw_aff_from_aff(LAff); + isl_aff *LAff = isl_aff_set_coefficient_si(isl_aff_zero_on_domain(LocalSpace), + isl_dim_in, loopDimension, 1); + isl_pw_aff *LPwAff = isl_pw_aff_from_aff(LAff); - // TODO: Do we need to check for NSW and NUW? - return isl_pw_aff_add(Start, isl_pw_aff_mul(Step, LPwAff)); - } + isl_pw_aff *AddRecPWA = isl_pw_aff_add(Start, isl_pw_aff_mul(Step, LPwAff)); - // Translate AddRecExpr from '{start, +, inc}' into 'start + {0, +, inc}' - // if 'start' is not zero. - ScalarEvolution &SE = *S->getSE(); - const SCEV *ZeroStartExpr = SE.getAddRecExpr( - SE.getConstant(Expr->getStart()->getType(), 0), - Expr->getStepRecurrence(SE), Expr->getLoop(), SCEV::FlagAnyWrap); + if (useModulo) + AddRecPWA = computeNonWrap(AddRecPWA, Expr, Expr->getNoWrapFlags()); - isl_pw_aff *ZeroStartResult = visit(ZeroStartExpr); - isl_pw_aff *Start = visit(Expr->getStart()); - - return isl_pw_aff_add(ZeroStartResult, Start); + return AddRecPWA; } __isl_give isl_pw_aff *SCEVAffinator::visitSMaxExpr(const SCEVSMaxExpr *Expr) { @@ -306,7 +345,7 @@ isl_val *V; isl_ctx *ctx = isl_set_get_ctx(S); - bool isWrapping = Range.isSignWrappedSet(); + bool isWrapping = Range.isSignWrappedSet() && !Range.isFullSet(); const auto LB = isWrapping ? Range.getLower() : Range.getSignedMin(); V = isl_valFromAPInt(ctx, LB, true); isl_set *SLB = isl_set_lower_bound_val(isl_set_copy(S), type, dim, V); @@ -647,8 +686,7 @@ AccessRelation = isl_map_universe(Space); for (int i = 0, Size = Access.Subscripts.size(); i < Size; ++i) { - isl_pw_aff *Affine = - SCEVAffinator::getPwAff(Statement, Access.Subscripts[i]); + isl_pw_aff *Affine = Statement->getPwAff(Access.Subscripts[i]); if (Size == 1) { // For the non delinearized arrays, divide the access function of the last @@ -820,6 +858,24 @@ isl_map *ScopStmt::getScattering() const { return isl_map_copy(Scattering); } +__isl_give isl_pw_aff *ScopStmt::getPwAff(const SCEV *S) { + isl_pw_aff *Expr = SCEVAffinator::getPwAff(this, S, false); + isl_pw_aff *ModExpr = SCEVAffinator::getPwAff(this, S, true); + + isl_set *Domain = isl_set_reset_tuple_id(getDomain()); + isl_pw_aff *DomExpr = + isl_pw_aff_intersect_domain(isl_pw_aff_copy(Expr), isl_set_copy(Domain)); + isl_pw_aff *DomModExpr = isl_pw_aff_intersect_domain(ModExpr, Domain); + + isl_set *NonWrappingDom = isl_pw_aff_ne_set(DomExpr, DomModExpr); + NonWrappingDom = isl_set_params(NonWrappingDom); + NonWrappingDom = isl_set_complement(NonWrappingDom); + + getParent()->addAssumption(NonWrappingDom); + + return Expr; +} + void ScopStmt::restrictDomain(__isl_take isl_set *NewDomain) { assert(isl_set_is_subset(NewDomain, Domain) && "New domain is not a subset of old domain!"); @@ -1341,10 +1397,6 @@ ConstantRange SRange = SE->getSignedRange(ParamID.first); - // TODO: Find a case where the full set is actually helpful. - if (SRange.isFullSet()) - continue; - Context = addRangeBoundsToSet(Context, SRange, dim, isl_dim_param); } } Index: test/ScopInfo/assume_gep_bounds.ll =================================================================== --- test/ScopInfo/assume_gep_bounds.ll +++ test/ScopInfo/assume_gep_bounds.ll @@ -16,7 +16,7 @@ ; values for which our assumption holds. ; CHECK: Assumed Context -; CHECK-NEXT: [n, m, p] -> { : p <= 30 and m <= 20 } +; CHECK-NEXT: [n, m, p] -> { : p <= 30 and m <= 20 and p <= 2305843009213694582 - 600n - 30m } target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/ScopInfo/simple_loop_1.ll =================================================================== --- test/ScopInfo/simple_loop_1.ll +++ test/ScopInfo/simple_loop_1.ll @@ -15,7 +15,7 @@ bb: ; preds = %bb, %entry %i = phi i64 [ 0, %entry ], [ %i.inc, %bb ] - %scevgep = getelementptr i64, i64* %a, i64 %i + %scevgep = getelementptr inbounds i64, i64* %a, i64 %i store i64 %i, i64* %scevgep %i.inc = add nsw i64 %i, 1 %exitcond = icmp eq i64 %i.inc, %N Index: test/ScopInfo/unsigned-condition.ll =================================================================== --- test/ScopInfo/unsigned-condition.ll +++ test/ScopInfo/unsigned-condition.ll @@ -20,7 +20,7 @@ br i1 %brcond, label %store, label %bb.backedge store: - %scevgep = getelementptr i64, i64* %a, i64 %i + %scevgep = getelementptr inbounds i64, i64* %a, i64 %i store i64 %i, i64* %scevgep br label %bb.backedge Index: test/ScopInfo/wraping_signed_expr_1.ll =================================================================== --- /dev/null +++ test/ScopInfo/wraping_signed_expr_1.ll @@ -0,0 +1,65 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s +; +; void f(int *A, char N, char p) { +; for (char i = 0; i < N; i++) +; A[i + 1] = 0; +; } +; +; The wrap function has no inbounds GEP but the nowrap function has. Therefore, +; we will add the assumption that i+1 won't overflow only to the former. +; +; CHECK: Function: wrap +; CHECK: Assumed Context: +; CHECK: [N] -> { : N <= 2305843009213693951 } +; +; CHECK: Function: nowrap +; CHECK: Assumed Context: +; CHECK: [N] -> { : } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @wrap(i32* %A, i64 %N, i64 %p) { +bb: + br label %bb2 + +bb2: ; preds = %bb7, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb7 ], [ 0, %bb ] + %tmp3 = icmp slt i64 %indvars.iv, %N + br i1 %tmp3, label %bb4, label %bb8 + +bb4: ; preds = %bb2 + %tmp5 = add nsw nuw i64 %indvars.iv, 1 + %tmp6 = getelementptr i32, i32* %A, i64 %tmp5 + store i32 0, i32* %tmp6, align 4 + br label %bb7 + +bb7: ; preds = %bb4 + %indvars.iv.next = add nsw nuw i64 %indvars.iv, 1 + br label %bb2 + +bb8: ; preds = %bb2 + ret void +} + +define void @nowrap(i32* %A, i64 %N, i64 %p) { +bb: + br label %bb2 + +bb2: ; preds = %bb7, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb7 ], [ 0, %bb ] + %tmp3 = icmp slt i64 %indvars.iv, %N + br i1 %tmp3, label %bb4, label %bb8 + +bb4: ; preds = %bb2 + %tmp5 = add nsw nuw i64 %indvars.iv, 1 + %tmp6 = getelementptr inbounds i32, i32* %A, i64 %tmp5 + store i32 0, i32* %tmp6, align 4 + br label %bb7 + +bb7: ; preds = %bb4 + %indvars.iv.next = add nsw nuw i64 %indvars.iv, 1 + br label %bb2 + +bb8: ; preds = %bb2 + ret void +} Index: test/ScopInfo/wraping_signed_expr_1_long.ll =================================================================== --- /dev/null +++ test/ScopInfo/wraping_signed_expr_1_long.ll @@ -0,0 +1,65 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s +; +; void f(long *A, char N, char p) { +; for (char i = 0; i < N; i++) +; A[i + 1] = 0; +; } +; +; The wrap function has no inbounds GEP but the nowrap function has. Therefore, +; we will add the assumption that i+1 won't overflow only to the former. +; +; CHECK: Function: wrap +; CHECK: Assumed Context: +; CHECK: [N] -> { : N <= 1152921504606846975 } +; +; CHECK: Function: nowrap +; CHECK: Assumed Context: +; CHECK: [N] -> { : } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @wrap(i64* %A, i64 %N, i64 %p) { +bb: + br label %bb2 + +bb2: ; preds = %bb7, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb7 ], [ 0, %bb ] + %tmp3 = icmp slt i64 %indvars.iv, %N + br i1 %tmp3, label %bb4, label %bb8 + +bb4: ; preds = %bb2 + %tmp5 = add nsw nuw i64 %indvars.iv, 1 + %tmp6 = getelementptr i64, i64* %A, i64 %tmp5 + store i64 0, i64* %tmp6, align 4 + br label %bb7 + +bb7: ; preds = %bb4 + %indvars.iv.next = add nsw nuw i64 %indvars.iv, 1 + br label %bb2 + +bb8: ; preds = %bb2 + ret void +} + +define void @nowrap(i64* %A, i64 %N, i64 %p) { +bb: + br label %bb2 + +bb2: ; preds = %bb7, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb7 ], [ 0, %bb ] + %tmp3 = icmp slt i64 %indvars.iv, %N + br i1 %tmp3, label %bb4, label %bb8 + +bb4: ; preds = %bb2 + %tmp5 = add nsw nuw i64 %indvars.iv, 1 + %tmp6 = getelementptr inbounds i64, i64* %A, i64 %tmp5 + store i64 0, i64* %tmp6, align 4 + br label %bb7 + +bb7: ; preds = %bb4 + %indvars.iv.next = add nsw nuw i64 %indvars.iv, 1 + br label %bb2 + +bb8: ; preds = %bb2 + ret void +} Index: test/ScopInfo/wraping_signed_expr_2.ll =================================================================== --- /dev/null +++ test/ScopInfo/wraping_signed_expr_2.ll @@ -0,0 +1,40 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s +; +; void f(int *A, int N, int p) { +; for (int i = 0; i < N; i++) +; A[i + 30] = 0; +; } +; +; The wrap function has no inbounds GEP but the nowrap function has. Therefore, +; we will add the assumption that i+1 won't overflow only to the former. +; +; CHECK: Function: wrap +; CHECK: Context: +; CHECK: [N] -> { : N <= 2147483647 and N >= -2147483648 } +; CHECK: Assumed Context: +; CHECK: [N] -> { : N <= 2147483618 } +; +target datalayout = "e-m:e-i32:64-f80:128-n8:16:32:64-S128" + +define void @wrap(i32* %A, i32 %N, i32 %p) { +bb: + br label %bb2 + +bb2: ; preds = %bb7, %bb + %indvars.iv = phi i32 [ %indvars.iv.next, %bb7 ], [ 0, %bb ] + %tmp3 = icmp slt i32 %indvars.iv, %N + br i1 %tmp3, label %bb4, label %bb8 + +bb4: ; preds = %bb2 + %tmp5 = add nsw i32 %indvars.iv, 30 + %tmp6 = getelementptr i32, i32* %A, i32 %tmp5 + store i32 0, i32* %tmp6, align 4 + br label %bb7 + +bb7: ; preds = %bb4 + %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 + br label %bb2 + +bb8: ; preds = %bb2 + ret void +} Index: test/ScopInfo/wraping_signed_expr_3.ll =================================================================== --- /dev/null +++ test/ScopInfo/wraping_signed_expr_3.ll @@ -0,0 +1,35 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s +; +; void f(int *A, int N, int p) { +; for (int i = 0; i < N; i++) +; A[i + p] = 0; +; } +; +; CHECK: Function: wrap +; CHECK: Assumed Context: +; CHECK: [N, p] -> { : p <= 2147483648 - N } +; +target datalayout = "e-m:e-i32:64-f80:128-n8:16:32:64-S128" + +define void @wrap(i32* %A, i32 %N, i32 %p) { +bb: + br label %bb2 + +bb2: ; preds = %bb7, %bb + %indvars.iv = phi i32 [ %indvars.iv.next, %bb7 ], [ 0, %bb ] + %tmp3 = icmp slt i32 %indvars.iv, %N + br i1 %tmp3, label %bb4, label %bb8 + +bb4: ; preds = %bb2 + %tmp5 = add nsw i32 %indvars.iv, %p + %tmp6 = getelementptr inbounds i32, i32* %A, i32 %tmp5 + store i32 0, i32* %tmp6, align 4 + br label %bb7 + +bb7: ; preds = %bb4 + %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 + br label %bb2 + +bb8: ; preds = %bb2 + ret void +} Index: test/ScopInfo/wraping_signed_expr_4.ll =================================================================== --- /dev/null +++ test/ScopInfo/wraping_signed_expr_4.ll @@ -0,0 +1,38 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s +; +; void f(int *A, int N, int p) { +; for (int i = 0; i < N; i++) +; A[p-1] = 0; +; } +; +; CHECK: Function: wrap +; CHECK: Assumed Context: +; CHECK: [N, p] -> { : +; CHECK-DAG: p >= -1152921504606846975 +; CHECK-DAG: p <= 1152921504606846976 +; CHECK: } +; +target datalayout = "e-m:e-i8:64-f80:128-n8:16:32:64-S128" + +define void @wrap(i64* %A, i64 %N, i64 %p) { +bb: + br label %bb2 + +bb2: ; preds = %bb7, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb7 ], [ 0, %bb ] + %tmp3 = icmp slt i64 %indvars.iv, %N + br i1 %tmp3, label %bb4, label %bb8 + +bb4: ; preds = %bb2 + %tmp5 = add i64 %p, -1 + %tmp6 = getelementptr i64, i64* %A, i64 %tmp5 + store i64 0, i64* %tmp6, align 4 + br label %bb7 + +bb7: ; preds = %bb4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb2 + +bb8: ; preds = %bb2 + ret void +} Index: test/ScopInfo/wraping_signed_expr_5.ll =================================================================== --- /dev/null +++ test/ScopInfo/wraping_signed_expr_5.ll @@ -0,0 +1,42 @@ +; RUN: opt %loadPolly -analyze < %s | FileCheck %s +; +; FIXME: Edit the run line and add checks! +; +; XFAIL: * +; +; void wraps(int *A, int p, short q, char r1, char r2) { +; for (char i = r1; i < r2; i++) +; A[p + q] = A[(int)r1 + (int)r2]; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @wraps(i32* %A, i32 %p, i16 signext %q, i8 signext %r1, i8 signext %r2) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i8 [ %r1, %entry ], [ %inc, %for.inc ] + %cmp = icmp slt i8 %i.0, %r2 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %conv3 = sext i8 %r1 to i64 + %conv4 = sext i8 %r2 to i64 + %add = add nsw i64 %conv3, %conv4 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %add + %tmp = load i32, i32* %arrayidx, align 4 + %conv5 = sext i16 %q to i32 + %add6 = add nsw i32 %conv5, %p + %idxprom7 = sext i32 %add6 to i64 + %arrayidx8 = getelementptr inbounds i32, i32* %A, i64 %idxprom7 + store i32 %tmp, i32* %arrayidx8, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add i8 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +}