Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -814,6 +814,17 @@ /// this scop and that need to be code generated as a run-time test. isl_set *AssumedContext; + /// @brief The boundary assumptions under which this scop was built. + /// + /// The boundary context is similar to the assumed context as it contains + /// constraints over the parameters we assume to be true. However, the + /// boundary context is less useful for dependence analysis and + /// simplification purposes as it contains only constraints that affect the + /// boundaries of the parameter ranges. As these constraints can become quite + /// complex, the boundary context and the assumed context are separated as a + /// meassure to save compile time. + isl_set *BoundaryContext; + /// @brief The schedule of the SCoP /// /// The schedule of the SCoP describes the execution order of the statements @@ -894,8 +905,12 @@ /// @brief Add the bounds of the parameters to the context. void addParameterBounds(); - /// @brief Simplify the assumed context. - void simplifyAssumedContext(); + /// @brief Simplify an assumption context with the context and the domain. + __isl_give isl_set * + simplifyAssumptionContext(__isl_take isl_set *AssumptionContext) const; + + /// @brief Simplify the assumed and boundary context. + void simplifyContexts(); /// @brief Create a new SCoP statement for either @p BB or @p R. /// @@ -1049,6 +1064,21 @@ /// to hold. void addAssumption(__isl_take isl_set *Set); + /// @brief Get the boundary context for this Scop. + /// + /// @return The boundary context of this Scop. + __isl_give isl_set *getBoundaryContext() const; + + /// @brief Get the runtime context for this Scop. + /// + /// The runtime context is the intersection of the assumed and the boundary + /// context. It is used in the AST generation to build the runtime checks that + /// are used to decide if the optimized or original version of the SCoP will + /// be exectued. + /// + /// @return The runtime context of this Scop. + __isl_give isl_set *getRuntimeContext() const; + /// @brief Build all alias groups for this SCoP. /// /// @returns True if __no__ error occurred, false otherwise. @@ -1065,6 +1095,9 @@ /// @brief Get an isl string representing the assumed context. std::string getAssumedContextStr() const; + /// @brief Get an isl string representing the boundary context. + std::string getBoundaryContextStr() const; + /// @brief Return the stmt for the given @p BB or nullptr if none. ScopStmt *getStmtForBasicBlock(BasicBlock *BB) const; Index: include/polly/Support/SCEVAffinator.h =================================================================== --- include/polly/Support/SCEVAffinator.h +++ include/polly/Support/SCEVAffinator.h @@ -34,6 +34,7 @@ namespace llvm { class Region; +class DataLayout; class ScalarEvolution; } @@ -57,6 +58,14 @@ __isl_give isl_pw_aff *getPwAff(const llvm::SCEV *E, const ScopStmt *Stmt = nullptr); + /// @brief Compute the context in which integer wrapping is happending. + /// + /// This context contains all parameter configurations for which we + /// know that the wrapping and non-wrapping expressions are different. + /// + /// @returns The context in which integer wrapping is happening. + __isl_give isl_set *getWrappingContext() const; + private: /// @brief Key to identify cached expressions. using CacheKey = std::pair; @@ -71,6 +80,26 @@ llvm::ScalarEvolution &SE; const ScopStmt *Stmt; + /// @brief Target data for element size computing. + const llvm::DataLayout &TD; + + /// @brief Compute the non-wrapping version of @p PWA for type @p ExprType. + + /// @param PWA The piece-wise affine function that might wrap. + /// @param Type The type of the SCEV that was translated to @p PWA. + /// + /// @returns The expr @p PWA modulo the size constraints of @p ExprType. + __isl_give isl_pw_aff *addModuloSemantic(__isl_take isl_pw_aff *PWA, + llvm::Type *ExprType) const; + + /// @brief Compute the domain in which integer wrapping for @p PWA happens. + /// + /// @returns The domain in which integer wrapping happens or nullptr if empty. + __isl_give isl_set *getWrappingDomain(llvm::SCEV::NoWrapFlags Flags, + llvm::Type *ExprType, + __isl_keep isl_pw_aff *PWA, + const ScopStmt *Stmt) const; + int getLoopDepth(const llvm::Loop *L); __isl_give isl_pw_aff *visit(const llvm::SCEV *E); Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -1162,7 +1162,15 @@ Stmt.realignParams(); } -void Scop::simplifyAssumedContext() { +__isl_give isl_set * +Scop::simplifyAssumptionContext(__isl_take isl_set *AssumptionContext) const { + AssumptionContext = isl_set_gist_params(AssumptionContext, + isl_union_set_params(getDomains())); + AssumptionContext = isl_set_gist_params(AssumptionContext, getContext()); + return AssumptionContext; +} + +void Scop::simplifyContexts() { // The parameter constraints of the iteration domains give us a set of // constraints that need to hold for all cases where at least a single // statement iteration is executed in the whole scop. We now simplify the @@ -1191,9 +1199,8 @@ // we assume that the condition m <= 0 or (m >= 1 and p >= 0) holds as // otherwise we would access out of bound data. Now, knowing that code is // only executed for the case m >= 0, it is sufficient to assume p >= 0. - AssumedContext = - isl_set_gist_params(AssumedContext, isl_union_set_params(getDomains())); - AssumedContext = isl_set_gist_params(AssumedContext, getContext()); + AssumedContext = simplifyAssumptionContext(AssumedContext); + BoundaryContext = simplifyAssumptionContext(BoundaryContext); } /// @brief Add the minimal/maximal access in @p Set to @p User. @@ -1464,7 +1471,8 @@ Scop::Scop(Region &R, ScalarEvolution &ScalarEvolution, isl_ctx *Context, unsigned MaxLoopDepth) : SE(&ScalarEvolution), R(R), IsOptimized(false), - MaxLoopDepth(MaxLoopDepth), IslCtx(Context), Affinator(this) {} + MaxLoopDepth(MaxLoopDepth), IslCtx(Context), Affinator(this), + BoundaryContext(nullptr) {} void Scop::initFromTempScop(TempScop &TempScop, LoopInfo &LI, ScopDetection &SD) { @@ -1480,7 +1488,11 @@ realignParams(); addParameterBounds(); - simplifyAssumedContext(); + + BoundaryContext = Affinator.getWrappingContext(); + BoundaryContext = isl_set_intersect_params(BoundaryContext, getContext()); + BoundaryContext = isl_set_complement(BoundaryContext); + simplifyContexts(); assert(NestLoops.empty() && "NestLoops not empty at top level!"); } @@ -1498,6 +1510,7 @@ Scop::~Scop() { isl_set_free(Context); isl_set_free(AssumedContext); + isl_set_free(BoundaryContext); isl_schedule_free(Schedule); // Free the alias groups @@ -1534,6 +1547,9 @@ std::string Scop::getAssumedContextStr() const { return stringFromIslObj(AssumedContext); } +std::string Scop::getBoundaryContextStr() const { + return stringFromIslObj(BoundaryContext); +} std::string Scop::getNameStr() const { std::string ExitName, EntryName; @@ -1566,6 +1582,17 @@ AssumedContext = isl_set_coalesce(AssumedContext); } +__isl_give isl_set *Scop::getBoundaryContext() const { + return isl_set_copy(BoundaryContext); +} + +__isl_give isl_set *Scop::getRuntimeContext() const { + isl_set *RTContext = getAssumedContext(); + RTContext = isl_set_intersect(RTContext, getBoundaryContext()); + RTContext = simplifyAssumptionContext(RTContext); + return RTContext; +} + void Scop::printContext(raw_ostream &OS) const { OS << "Context:\n"; @@ -1584,6 +1611,14 @@ OS.indent(4) << getAssumedContextStr() << "\n"; + OS.indent(4) << "Boundary Context:\n"; + if (!BoundaryContext) { + OS.indent(4) << "n/a\n\n"; + return; + } + + OS.indent(4) << getBoundaryContextStr() << "\n"; + for (const SCEV *Parameter : Parameters) { int Dim = ParameterIds.find(Parameter)->second; OS.indent(4) << "p" << Dim << ": " << *Parameter << "\n"; Index: lib/CodeGen/IslAst.cpp =================================================================== --- lib/CodeGen/IslAst.cpp +++ lib/CodeGen/IslAst.cpp @@ -327,9 +327,9 @@ void IslAst::buildRunCondition(__isl_keep isl_ast_build *Build) { // The conditions that need to be checked at run-time for this scop are - // available as an isl_set in the AssumedContext from which we can directly + // available as an isl_set in the runtime context from which we can directly // derive a run-time condition. - RunCondition = isl_ast_build_expr_from_set(Build, S->getAssumedContext()); + RunCondition = isl_ast_build_expr_from_set(Build, S->getRuntimeContext()); // Create the alias checks from the minimal/maximal accesses in each alias // group which consists of read only and non read only (read write) accesses. Index: lib/Support/SCEVAffinator.cpp =================================================================== --- lib/Support/SCEVAffinator.cpp +++ lib/Support/SCEVAffinator.cpp @@ -18,16 +18,20 @@ #include "polly/Support/ScopHelper.h" #include "polly/Support/SCEVValidator.h" +#include "llvm/IR/Module.h" + #include "isl/aff.h" #include "isl/set.h" #include "isl/val.h" +#include "isl/union_set.h" #include "isl/local_space.h" using namespace llvm; using namespace polly; SCEVAffinator::SCEVAffinator(Scop *S) - : S(S), Ctx(S->getIslCtx()), R(S->getRegion()), SE(*S->getSE()) {} + : S(S), Ctx(S->getIslCtx()), R(S->getRegion()), SE(*S->getSE()), + TD(R.getEntry()->getParent()->getParent()->getDataLayout()) {} SCEVAffinator::~SCEVAffinator() { for (const auto &CachedPair : CachedExpressions) @@ -48,6 +52,94 @@ return visit(Expr); } +__isl_give isl_set * +SCEVAffinator::getWrappingDomain(SCEV::NoWrapFlags Flags, Type *ExprType, + __isl_take isl_pw_aff *PWA, + const ScopStmt *Stmt) const { + // If the SCEV flags do contain NSW (no signed wrap) then PWA already + // represents Expr in modulo semantic (it is not allowed to overflow), thus we + // are done. Otherwise, we will compute: + // PWA = ((PWA + 2^(n-1)) mod (2 ^ n)) - 2^(n-1) + // whereas n is the number of bits of the Expr, hence: + // n = bitwidth(ExprType) + + if (Flags & SCEV::FlagNSW) + return nullptr; + + isl_pw_aff *PWAMod = addModuloSemantic(isl_pw_aff_copy(PWA), ExprType); + if (isl_pw_aff_is_equal(PWA, PWAMod)) { + isl_pw_aff_free(PWAMod); + return nullptr; + } + + PWA = isl_pw_aff_copy(PWA); + + if (Stmt) { + isl_set *Domain = isl_set_reset_tuple_id(Stmt->getDomain()); + PWA = isl_pw_aff_intersect_domain(PWA, isl_set_copy(Domain)); + PWAMod = isl_pw_aff_intersect_domain(PWAMod, Domain); + } + + return isl_pw_aff_ne_set(PWA, PWAMod); +} + +__isl_give isl_set *SCEVAffinator::getWrappingContext() const { + + isl_union_set *WrappingDomain = isl_union_set_empty(S->getParamSpace()); + + for (const auto &CachedPair : CachedExpressions) { + const SCEV *Expr = CachedPair.first.first; + SCEV::NoWrapFlags Flags; + + switch (Expr->getSCEVType()) { + case scAddExpr: + Flags = cast(Expr)->getNoWrapFlags(); + break; + case scMulExpr: + Flags = cast(Expr)->getNoWrapFlags(); + break; + case scAddRecExpr: + Flags = cast(Expr)->getNoWrapFlags(); + break; + default: + continue; + } + + const ScopStmt *Stmt = CachedPair.first.second; + isl_pw_aff *PWA = CachedPair.second; + + isl_set *WDom = getWrappingDomain(Flags, Expr->getType(), PWA, Stmt); + if (!WDom) + continue; + + WrappingDomain = isl_union_set_add_set(WrappingDomain, WDom); + } + + return isl_set_coalesce(isl_union_set_params(WrappingDomain)); +} + +__isl_give isl_pw_aff *SCEVAffinator::addModuloSemantic(isl_pw_aff *PWA, + Type *ExprType) const { + unsigned Width = TD.getTypeStoreSizeInBits(ExprType); + isl_ctx *Ctx = isl_pw_aff_get_ctx(PWA); + + isl_val *ModVal = isl_val_int_from_ui(Ctx, Width); + ModVal = isl_val_2exp(ModVal); + + isl_val *AddVal = isl_val_int_from_ui(Ctx, Width - 1); + AddVal = isl_val_2exp(AddVal); + + isl_set *Domain = isl_pw_aff_domain(isl_pw_aff_copy(PWA)); + + isl_pw_aff *AddPW = isl_pw_aff_val_on_domain(Domain, AddVal); + + PWA = isl_pw_aff_add(PWA, isl_pw_aff_copy(AddPW)); + PWA = isl_pw_aff_mod_val(PWA, ModVal); + PWA = isl_pw_aff_sub(PWA, AddPW); + + return PWA; +} + __isl_give isl_pw_aff *SCEVAffinator::visit(const SCEV *Expr) { auto Key = std::make_pair(Expr, Stmt); @@ -74,6 +166,11 @@ } PWA = SCEVVisitor::visit(Expr); + + // For compile time reasons we need to simplify the PWA before we cache and + // return it. + PWA = isl_pw_aff_coalesce(PWA); + CachedExpressions[Key] = PWA; return isl_pw_aff_copy(PWA); } @@ -124,8 +221,6 @@ Sum = isl_pw_aff_add(Sum, NextSummand); } - // TODO: Check for NSW and NUW. - return Sum; } @@ -166,7 +261,6 @@ isl_aff_zero_on_domain(LocalSpace), isl_dim_in, loopDimension, 1); isl_pw_aff *LPwAff = isl_pw_aff_from_aff(LAff); - // TODO: Do we need to check for NSW and NUW? return isl_pw_aff_mul(Step, LPwAff); } Index: test/DependenceInfo/sequential_loops.ll =================================================================== --- test/DependenceInfo/sequential_loops.ll +++ test/DependenceInfo/sequential_loops.ll @@ -272,7 +272,11 @@ ; VALUE: RAW dependences: ; VALUE: [p] -> { ; VALUE: Stmt_S1[i0] -> Stmt_S2[-p + i0] : -; VALUE: p <= 190 and i0 >= p and i0 <= 9 + p and i0 >= 0 and i0 <= 99 +; VALUE-DAG: p <= 190 +; VALUE-DAG: i0 >= p +; VALUE-DAG: i0 <= 9 + p +; VALUE-DAG: i0 <= 99 +; VALUE-DAG: i0 >= 0 ; VALUE: } ; VALUE: WAR dependences: ; VALUE: [p] -> { Index: test/Isl/Ast/aliasing_parametric_simple_2.ll =================================================================== --- test/Isl/Ast/aliasing_parametric_simple_2.ll +++ test/Isl/Ast/aliasing_parametric_simple_2.ll @@ -5,7 +5,7 @@ ; A[i] = B[c - 10] + B[5]; ; } ; -; CHECK: if (1 && (&MemRef_B[c <= 15 ? 6 : c - 9] <= &MemRef_A[0] || &MemRef_A[1024] <= &MemRef_B[c >= 15 ? 5 : c - 10])) +; CHECK: if (c >= -2147483638 && (&MemRef_B[c <= 15 ? 6 : c - 9] <= &MemRef_A[0] || &MemRef_A[1024] <= &MemRef_B[c >= 15 ? 5 : c - 10])) ; CHECK: for (int c0 = 0; c0 <= 1023; c0 += 1) ; CHECK: Stmt_for_body(c0); ; CHECK: else Index: test/Isl/Ast/simple-run-time-condition.ll =================================================================== --- test/Isl/Ast/simple-run-time-condition.ll +++ test/Isl/Ast/simple-run-time-condition.ll @@ -18,9 +18,9 @@ ; cause any code to be executed are not generated. ; CHECK: if ( -; CHECK: (o >= 1 && q <= 0 && m + q >= 0) -; CHECK: || -; CHECK; (o <= 0 && m + q >= 100 && q <= 100) +; CHECK-DAG: (o >= 1 && n + p <= 9223372036854775808 && q <= 0 && m + q >= 0) +; CHECK-DAG: || +; CHECK-DAG: (o <= 0 && n + p <= 9223372036854775808 && m + q >= 100 && q <= 100) ; CHECK: ) ; CHECK: if (o >= 1) { Index: test/Isl/CodeGen/aliasing_parametric_simple_2.ll =================================================================== --- test/Isl/CodeGen/aliasing_parametric_simple_2.ll +++ test/Isl/CodeGen/aliasing_parametric_simple_2.ll @@ -5,6 +5,8 @@ ; A[i] = B[c - 10] + B[5]; ; } ; +; CHECK: %[[OV0:[._a-zA-Z0-9]*]] = sext i32 %c to i64 +; CHECK: %[[OV1:[._a-zA-Z0-9]*]] = icmp sge i64 %[[OV0]], -2147483638 ; CHECK: %[[M0:[._a-zA-Z0-9]*]] = sext i32 %c to i64 ; CHECK: %[[M1:[._a-zA-Z0-9]*]] = icmp sle i64 %[[M0]], 15 ; CHECK: %[[M2:[._a-zA-Z0-9]*]] = sext i32 %c to i64 @@ -22,7 +24,7 @@ ; CHECK: %[[BMin:[._a-zA-Z0-9]*]] = getelementptr i32, i32* %B, i64 %[[m4]] ; CHECK: %[[AltB:[._a-zA-Z0-9]*]] = icmp ule i32* %[[AMax]], %[[BMin]] ; CHECK: %[[NoAlias:[._a-zA-Z0-9]*]] = or i1 %[[BltA]], %[[AltB]] -; CHECK: %[[RTC:[._a-zA-Z0-9]*]] = and i1 true, %[[NoAlias]] +; CHECK: %[[RTC:[._a-zA-Z0-9]*]] = and i1 %[[OV1]], %[[NoAlias]] ; CHECK: br i1 %[[RTC]], label %polly.start, label %for.cond ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/Isl/CodeGen/pointer-type-expressions-2.ll =================================================================== --- test/Isl/CodeGen/pointer-type-expressions-2.ll +++ test/Isl/CodeGen/pointer-type-expressions-2.ll @@ -23,7 +23,7 @@ ; Check that we transform this into a pointer difference. -; CODEGEN: %0 = ptrtoint i8* %end to i64 -; CODEGEN: %1 = ptrtoint i8* %start to i64 -; CODEGEN: %2 = sub i64 %0, %1 +; CODEGEN: %[[r0:[._a-zA-Z0-9]]] = ptrtoint i8* %end to i64 +; CODEGEN: %[[r1:[._a-zA-Z0-9]]] = ptrtoint i8* %start to i64 +; CODEGEN: %[[r2:[._a-zA-Z0-9]]] = sub i64 %[[r0]], %[[r1]] Index: test/Isl/CodeGen/pointer-type-expressions.ll =================================================================== --- test/Isl/CodeGen/pointer-type-expressions.ll +++ test/Isl/CodeGen/pointer-type-expressions.ll @@ -41,6 +41,7 @@ ; CHECK: Stmt_store(c0); ; CHECK: } -; CODEGEN: %0 = bitcast float* %P to i8* -; CODEGEN: %1 = icmp ule i8* %0, inttoptr (i64 -1 to i8*) +; CODEGEN: %[[R0:[0-9]*]] = bitcast float* %P to i8* +; CODEGEN: %[[R1:[0-9]*]] = bitcast float* %P to i8* +; CODEGEN-NEXT: icmp ule i8* %[[R1]], inttoptr (i64 -1 to i8*) Index: test/Isl/CodeGen/two-scops-in-row.ll =================================================================== --- test/Isl/CodeGen/two-scops-in-row.ll +++ test/Isl/CodeGen/two-scops-in-row.ll @@ -3,7 +3,7 @@ ; RUN: opt %loadPolly -polly-detect-unprofitable -polly-no-early-exit -polly-codegen -polly-ignore-aliasing -disable-output < %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -; SCALAR: if (1) +; SCALAR: if (Scalar0.val >= -2147483548 && Scalar0.val <= 2147483646) ; SCALAR: { ; SCALAR: for (int c0 = 0; c0 <= -Scalar0.val + 99; c0 += 1) ; SCALAR: Stmt_for_1(c0); Index: test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_3.ll =================================================================== --- test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_3.ll +++ test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_3.ll @@ -12,7 +12,19 @@ ; INNERMOST: Region: %bb15---%bb26 ; INNERMOST: Max Loop Depth: 1 ; INNERMOST: Context: -; INNERMOST: [p_0, p_1, p_2] -> { : p_0 >= 0 and p_0 <= 2147483647 and p_1 >= 0 and p_1 <= 4096 and p_2 >= 0 and p_2 <= 4096 } +; INNERMOST: [p_0, p_1, p_2] -> { : +; INNERMOST-DAG: p_0 >= 0 +; INNERMOST-DAG: and +; INNERMOST-DAG: p_0 <= 2147483647 +; INNERMOST-DAG: and +; INNERMOST-DAG: p_1 >= 0 +; INNERMOST-DAG: and +; INNERMOST-DAG: p_1 <= 4096 +; INNERMOST-DAG: and +; INNERMOST-DAG: p_2 >= 0 +; INNERMOST-DAG: and +; INNERMOST-DAG: p_2 <= 4096 +; INNERMOST: } ; INNERMOST: Assumed Context: ; INNERMOST: [p_0, p_1, p_2] -> { : } ; INNERMOST: p0: {0,+,{0,+,1}<%bb11>}<%bb13> Index: test/ScopInfo/NonAffine/non_affine_conditional_surrounding_non_affine_loop.ll =================================================================== --- test/ScopInfo/NonAffine/non_affine_conditional_surrounding_non_affine_loop.ll +++ test/ScopInfo/NonAffine/non_affine_conditional_surrounding_non_affine_loop.ll @@ -5,7 +5,11 @@ ; INNERMOST: Region: %bb9---%bb18 ; INNERMOST: Max Loop Depth: 1 ; INNERMOST: Context: -; INNERMOST: [p_0] -> { : p_0 >= -2199023255552 and p_0 <= 2199023254528 } +; INNERMOST: [p_0] -> { : +; INNERMOST-DAG: p_0 >= -2199023255552 +; INNERMOST-DAG: and +; INNERMOST-DAG: p_0 <= 2199023254528 +; INNERMOST: } ; INNERMOST: Assumed Context: ; INNERMOST: [p_0] -> { : } ; INNERMOST: p0: {0,+,(sext i32 %N to i64)}<%bb3> Index: test/ScopInfo/assume_gep_bounds.ll =================================================================== --- test/ScopInfo/assume_gep_bounds.ll +++ test/ScopInfo/assume_gep_bounds.ll @@ -20,6 +20,8 @@ ; CHECK-DAG: p <= 30 ; CHECK-DAG: and ; CHECK-DAG: m <= 20 +; CHECK-DAG: and +; CHECK-DAG: p <= 2305843009213694582 - 600n - 30m ; CHECK: } target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/ScopInfo/assume_gep_bounds_2.ll =================================================================== --- test/ScopInfo/assume_gep_bounds_2.ll +++ test/ScopInfo/assume_gep_bounds_2.ll @@ -16,10 +16,14 @@ ; accessed. In this case the value of m does not matter. ; CHECK: Assumed Context: -; CHECK-NEXT: [n, m, p] -> { : -; CHECK-DAG: (n >= 1 and m <= 20 and p <= 20) +; CHECK-NEXT: [n, m, p] -> { : (n <= 0 and p <= 20) or (n >= 1 and m <= 20 and p <= 20) } +; CHECK: Boundary Context: +; CHECK-NEXT: [n, m, p] -> { : +; CHECK-DAG: (n <= 0 and p <= 0) ; CHECK-DAG: or -; CHECK-DAG: (n <= 0 and p <= 20) +; CHECK-DAG: (n >= 1 and m <= 2305843009213693972 - 20n and p <= 0) +; CHECK-DAG: or +; CHECK-DAG: (n >= 1 and m <= 2305843009213693972 - 20n and p <= 2305843009213693972 - 20m and p >= 1) ; CHECK: } target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/ScopInfo/loop_carry.ll =================================================================== --- test/ScopInfo/loop_carry.ll +++ test/ScopInfo/loop_carry.ll @@ -45,8 +45,6 @@ ret i64 0 } -; CHECK: Context: -; CHECK: [n] -> { : } ; CHECK: Statements { ; CHECK: Stmt_bb ; CHECK: Domain := Index: test/ScopInfo/multidim_2d_outer_parametric_offset.ll =================================================================== --- test/ScopInfo/multidim_2d_outer_parametric_offset.ll +++ test/ScopInfo/multidim_2d_outer_parametric_offset.ll @@ -10,7 +10,7 @@ ; } ; CHECK: Assumed Context: -; CHECK: [m, p] -> { : } +; CHECK: [m, p] -> { : p <= 9223372036854775708 } ; CHECK: p0: %m ; CHECK: p1: %p ; CHECK: Statements { Index: test/ScopInfo/pointer-type-expressions.ll =================================================================== --- test/ScopInfo/pointer-type-expressions.ll +++ test/ScopInfo/pointer-type-expressions.ll @@ -19,7 +19,7 @@ br i1 %brcond, label %store, label %bb.backedge store: - %scevgep = getelementptr i64, i64* %a, i64 %i + %scevgep = getelementptr inbounds i64, i64* %a, i64 %i store i64 %i, i64* %scevgep br label %bb.backedge Index: test/ScopInfo/ranged_parameter.ll =================================================================== --- test/ScopInfo/ranged_parameter.ll +++ test/ScopInfo/ranged_parameter.ll @@ -4,7 +4,11 @@ ; range metadata (see bottom of the file) are present: ; ; CHECK: Context: -; CHECK: [p_0] -> { : p_0 >= 0 and p_0 <= 255 } +; CHECK: [p_0] -> { : +; CHECK-DAG: p_0 >= 0 +; CHECK-DAG: and +; CHECK-DAG: p_0 <= 255 +; CHECK: } ; ; void jd(int *A, int *p /* in [0,256) */) { ; for (int i = 0; i < 1024; i++) Index: test/ScopInfo/simple_loop_1.ll =================================================================== --- test/ScopInfo/simple_loop_1.ll +++ test/ScopInfo/simple_loop_1.ll @@ -14,7 +14,7 @@ bb: ; preds = %bb, %entry %i = phi i64 [ 0, %entry ], [ %i.inc, %bb ] - %scevgep = getelementptr i64, i64* %a, i64 %i + %scevgep = getelementptr inbounds i64, i64* %a, i64 %i store i64 %i, i64* %scevgep %i.inc = add nsw i64 %i, 1 %exitcond = icmp eq i64 %i.inc, %N Index: test/ScopInfo/unsigned-condition.ll =================================================================== --- test/ScopInfo/unsigned-condition.ll +++ test/ScopInfo/unsigned-condition.ll @@ -19,7 +19,7 @@ br i1 %brcond, label %store, label %bb.backedge store: - %scevgep = getelementptr i64, i64* %a, i64 %i + %scevgep = getelementptr inbounds i64, i64* %a, i64 %i store i64 %i, i64* %scevgep br label %bb.backedge Index: test/ScopInfo/wraping_signed_expr_0.ll =================================================================== --- /dev/null +++ test/ScopInfo/wraping_signed_expr_0.ll @@ -0,0 +1,71 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s +; +; void f(int *A, char N, char p) { +; for (char i = 0; i < N; i++) { +; A[i + 3] = 0; +; } +; } +; +; The wrap function has no inbounds GEP but the nowrap function has. Therefore, +; we will add the assumption that i+1 won't overflow only to the former. +; +; CHECK: Function: wrap +; CHECK: Boundary Context: +; CHECK: [N] -> { : N <= 125 } +; +; +; FIXME: This is a negative test as nowrap should not need an assumed context. +; However %tmp5 in @nowrap is translated to the SCEV <3,+,1><%bb2> +; which lacks the flags we would need to avoid runtime checks. +; +; CHECK: Function: nowrap +; CHECK: Boundary Context: +; CHECK-NOT: [N] -> { : } +; +target datalayout = "e-m:e-i8:64-f80:128-n8:16:32:64-S128" + +define void @wrap(i32* %A, i8 %N, i8 %p) { +bb: + br label %bb2 + +bb2: ; preds = %bb7, %bb + %indvars.iv = phi i8 [ %indvars.iv.next, %bb7 ], [ 0, %bb ] + %tmp3 = icmp slt i8 %indvars.iv, %N + br i1 %tmp3, label %bb4, label %bb8 + +bb4: ; preds = %bb2 + %tmp5 = add i8 %indvars.iv, 3 + %tmp6 = getelementptr i32, i32* %A, i8 %tmp5 + store i32 0, i32* %tmp6, align 4 + br label %bb7 + +bb7: ; preds = %bb4 + %indvars.iv.next = add nsw nuw i8 %indvars.iv, 1 + br label %bb2 + +bb8: ; preds = %bb2 + ret void +} + +define void @nowrap(i32* %A, i8 %N, i8 %p) { +bb: + br label %bb2 + +bb2: ; preds = %bb7, %bb + %indvars.iv = phi i8 [ %indvars.iv.next, %bb7 ], [ 0, %bb ] + %tmp3 = icmp slt i8 %indvars.iv, %N + br i1 %tmp3, label %bb4, label %bb8 + +bb4: ; preds = %bb2 + %tmp5 = add nsw nuw i8 %indvars.iv, 3 + %tmp6 = getelementptr inbounds i32, i32* %A, i8 %tmp5 + store i32 0, i32* %tmp6, align 4 + br label %bb7 + +bb7: ; preds = %bb4 + %indvars.iv.next = add nsw nuw i8 %indvars.iv, 1 + br label %bb2 + +bb8: ; preds = %bb2 + ret void +} Index: test/ScopInfo/wraping_signed_expr_1.ll =================================================================== --- /dev/null +++ test/ScopInfo/wraping_signed_expr_1.ll @@ -0,0 +1,72 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s +; +; void f(long *A, long N, long p) { +; for (long i = 0; i < N; i++) +; A[i + 1] = 0; +; } +; +; The wrap function has no inbounds GEP but the nowrap function has. Therefore, +; we will add the assumption that i+1 won't overflow only to the former. +; +; Note: +; 1152921504606846975 * sizeof(long) <= 2 ^ 63 - 1 +; and +; 1152921504606846976 * sizeof(long) > 2 ^ 63 - 1 +; with +; sizeof(long) == 8 +; +; CHECK: Function: wrap +; CHECK: Boundary Context: +; CHECK: [N] -> { : N <= 1152921504606846975 } +; +; CHECK: Function: nowrap +; CHECK: Boundary Context: +; CHECK: [N] -> { : } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @wrap(i64* %A, i64 %N, i64 %p) { +bb: + br label %bb2 + +bb2: ; preds = %bb7, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb7 ], [ 0, %bb ] + %tmp3 = icmp slt i64 %indvars.iv, %N + br i1 %tmp3, label %bb4, label %bb8 + +bb4: ; preds = %bb2 + %tmp5 = add nsw nuw i64 %indvars.iv, 1 + %tmp6 = getelementptr i64, i64* %A, i64 %tmp5 + store i64 0, i64* %tmp6, align 4 + br label %bb7 + +bb7: ; preds = %bb4 + %indvars.iv.next = add nsw nuw i64 %indvars.iv, 1 + br label %bb2 + +bb8: ; preds = %bb2 + ret void +} + +define void @nowrap(i64* %A, i64 %N, i64 %p) { +bb: + br label %bb2 + +bb2: ; preds = %bb7, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb7 ], [ 0, %bb ] + %tmp3 = icmp slt i64 %indvars.iv, %N + br i1 %tmp3, label %bb4, label %bb8 + +bb4: ; preds = %bb2 + %tmp5 = add nsw nuw i64 %indvars.iv, 1 + %tmp6 = getelementptr inbounds i64, i64* %A, i64 %tmp5 + store i64 0, i64* %tmp6, align 4 + br label %bb7 + +bb7: ; preds = %bb4 + %indvars.iv.next = add nsw nuw i64 %indvars.iv, 1 + br label %bb2 + +bb8: ; preds = %bb2 + ret void +} Index: test/ScopInfo/wraping_signed_expr_2.ll =================================================================== --- /dev/null +++ test/ScopInfo/wraping_signed_expr_2.ll @@ -0,0 +1,42 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s +; +; void f(int *A, int N, int p) { +; for (int i = 0; i < N; i++) +; A[i + 30] = 0; +; } +; +; The wrap function has no inbounds GEP but the nowrap function has. Therefore, +; we will add the assumption that i+1 won't overflow only to the former. +; +; Note: 2147483618 + 30 == 2 ^ 31 +; +; CHECK: Function: wrap +; CHECK: Context: +; CHECK: [N] -> { : N <= 2147483647 and N >= -2147483648 } +; CHECK: Boundary Context: +; CHECK: [N] -> { : N <= 2147483618 } +; +target datalayout = "e-m:e-i32:64-f80:128-n8:16:32:64-S128" + +define void @wrap(i32* %A, i32 %N, i32 %p) { +bb: + br label %bb2 + +bb2: ; preds = %bb7, %bb + %indvars.iv = phi i32 [ %indvars.iv.next, %bb7 ], [ 0, %bb ] + %tmp3 = icmp slt i32 %indvars.iv, %N + br i1 %tmp3, label %bb4, label %bb8 + +bb4: ; preds = %bb2 + %tmp5 = add i32 %indvars.iv, 30 + %tmp6 = getelementptr i32, i32* %A, i32 %tmp5 + store i32 0, i32* %tmp6, align 4 + br label %bb7 + +bb7: ; preds = %bb4 + %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 + br label %bb2 + +bb8: ; preds = %bb2 + ret void +} Index: test/ScopInfo/wraping_signed_expr_3.ll =================================================================== --- /dev/null +++ test/ScopInfo/wraping_signed_expr_3.ll @@ -0,0 +1,37 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s +; +; void f(int *A, int N, int p) { +; for (int i = 0; i < N; i++) +; A[i + p] = 0; +; } +; +; Note: 2147483648 == 2 ^ 31 +; +; CHECK: Function: wrap +; CHECK: Boundary Context: +; CHECK: [N, p] -> { : p <= 2147483648 - N } +; +target datalayout = "e-m:e-i32:64-f80:128-n8:16:32:64-S128" + +define void @wrap(i32* %A, i32 %N, i32 %p) { +bb: + br label %bb2 + +bb2: ; preds = %bb7, %bb + %indvars.iv = phi i32 [ %indvars.iv.next, %bb7 ], [ 0, %bb ] + %tmp3 = icmp slt i32 %indvars.iv, %N + br i1 %tmp3, label %bb4, label %bb8 + +bb4: ; preds = %bb2 + %tmp5 = add i32 %indvars.iv, %p + %tmp6 = getelementptr inbounds i32, i32* %A, i32 %tmp5 + store i32 0, i32* %tmp6, align 4 + br label %bb7 + +bb7: ; preds = %bb4 + %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 + br label %bb2 + +bb8: ; preds = %bb2 + ret void +} Index: test/ScopInfo/wraping_signed_expr_4.ll =================================================================== --- /dev/null +++ test/ScopInfo/wraping_signed_expr_4.ll @@ -0,0 +1,37 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s +; +; void f(char *A, char N, char p) { +; for (char i = 0; i < N; i++) +; A[p-1] = 0; +; } +; +; CHECK: Function: wrap +; CHECK: Context: +; CHECK: [N, p] -> { : N <= 127 and N >= -128 and p <= 127 and p >= -128 } +; CHECK: Boundary Context: +; CHECK: [N, p] -> { : p >= -127 } +; +target datalayout = "e-m:e-i8:64-f80:128-n8:16:32:64-S128" + +define void @wrap(i8* %A, i8 %N, i8 %p) { +bb: + br label %bb2 + +bb2: ; preds = %bb7, %bb + %indvars.iv = phi i8 [ %indvars.iv.next, %bb7 ], [ 0, %bb ] + %tmp3 = icmp slt i8 %indvars.iv, %N + br i1 %tmp3, label %bb4, label %bb8 + +bb4: ; preds = %bb2 + %tmp5 = add i8 %p, -1 + %tmp6 = getelementptr i8, i8* %A, i8 %tmp5 + store i8 0, i8* %tmp6, align 4 + br label %bb7 + +bb7: ; preds = %bb4 + %indvars.iv.next = add nuw nsw i8 %indvars.iv, 1 + br label %bb2 + +bb8: ; preds = %bb2 + ret void +} Index: test/ScopInfo/wraping_signed_expr_5.ll =================================================================== --- /dev/null +++ test/ScopInfo/wraping_signed_expr_5.ll @@ -0,0 +1,48 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s +; +; We should not generate runtime check for ((int)r1 + (int)r2) as it is known not +; to overflow. However (p + q) can, thus checks are needed. +; +; CHECK: Boundary Context: +; CHECK: [r1, r2, q, p] -> { +; CHECK-DAG: p <= 2147483647 - q +; CHECK-DAG: and +; CHECK-DAG: p >= -2147483648 - q +; CHECK: } +; +; void wraps(int *A, int p, short q, char r1, char r2) { +; for (char i = r1; i < r2; i++) +; A[p + q] = A[(int)r1 + (int)r2]; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @wraps(i32* %A, i32 %p, i16 signext %q, i8 signext %r1, i8 signext %r2) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i8 [ %r1, %entry ], [ %inc, %for.inc ] + %cmp = icmp slt i8 %i.0, %r2 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %conv3 = sext i8 %r1 to i64 + %conv4 = sext i8 %r2 to i64 + %add = add nsw i64 %conv3, %conv4 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %add + %tmp = load i32, i32* %arrayidx, align 4 + %conv5 = sext i16 %q to i32 + %add6 = add nsw i32 %conv5, %p + %idxprom7 = sext i32 %add6 to i64 + %arrayidx8 = getelementptr inbounds i32, i32* %A, i64 %idxprom7 + store i32 %tmp, i32* %arrayidx8, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add i8 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/ScopInfo/wraping_signed_expr_slow_1.ll =================================================================== --- /dev/null +++ test/ScopInfo/wraping_signed_expr_slow_1.ll @@ -0,0 +1,81 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s +; +; This checks that the no-wraps checks will be computed fast as some example +; already showed huge slowdowns even though the inbounds and nsw flags were +; all in place. +; +; // Inspired by itrans8x8 in transform8x8.c from the ldecode benchmark. +; void fast(char *A, char N, char M) { +; for (char i = 0; i < 8; i++) { +; short index0 = (short)(i + N); +; #ifdef fast +; short index1 = (index0 * 1) + (short)M; +; #else +; short index1 = (index0 * 16) + (short)M; +; #endif +; A[index1]++; +; } +; } +; +; CHECK: Function: fast +; CHECK: Function: slow +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @fast(i8* %A, i8 %N, i8 %M) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i8 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i8 %indvars.iv, 8 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tmp3 = add nsw i8 %indvars.iv, %N + %tmp3ext = sext i8 %tmp3 to i16 + ;%mul = mul nsw i16 %tmp3ext, 16 + %Mext = sext i8 %M to i16 + %add2 = add nsw i16 %tmp3ext, %Mext + %arrayidx = getelementptr inbounds i8, i8* %A, i16 %add2 + %tmp4 = load i8, i8* %arrayidx, align 4 + %inc = add nsw i8 %tmp4, 1 + store i8 %inc, i8* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i8 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +define void @slow(i8* %A, i8 %N, i8 %M) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i8 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i8 %indvars.iv, 8 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tmp3 = add nsw i8 %indvars.iv, %N + %tmp3ext = sext i8 %tmp3 to i16 + %mul = mul nsw i16 %tmp3ext, 16 + %Mext = sext i8 %M to i16 + %add2 = add nsw i16 %mul, %Mext + %arrayidx = getelementptr inbounds i8, i8* %A, i16 %add2 + %tmp4 = load i8, i8* %arrayidx, align 4 + %inc = add nsw i8 %tmp4, 1 + store i8 %inc, i8* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i8 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/ScopInfo/wraping_signed_expr_slow_2.ll =================================================================== --- /dev/null +++ test/ScopInfo/wraping_signed_expr_slow_2.ll @@ -0,0 +1,86 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s +; +; This checks that the no-wraps checks will be computed fast as some example +; already showed huge slowdowns even though the inbounds and nsw flags were +; all in place. +; +; // Inspired by itrans8x8 in transform8x8.c from the ldecode benchmark. +; void fast(char *A, char N, char M) { +; for (char i = 0; i < 8; i++) { +; char index0 = i + N; +; char index1 = index0 * 16; +; char index2 = index1 + M; +; A[(short)index2]++; +; } +; } +; +; void slow(char *A, char N, char M) { +; for (char i = 0; i < 8; i++) { +; char index0 = i + N; +; char index1 = index0 * 16; +; short index2 = ((short)index1) + ((short)M); +; A[index2]++; +; } +; } +; +; CHECK: Function: fast +; CHECK: Function: slow +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @fast(i8* %A, i8 %N, i8 %M) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i8 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i8 %indvars.iv, 8 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tmp3 = add nsw i8 %indvars.iv, %N + %mul = mul nsw i8 %tmp3, 16 + %add2 = add nsw i8 %mul, %M + %add2ext = sext i8 %add2 to i16 + %arrayidx = getelementptr inbounds i8, i8* %A, i16 %add2ext + %tmp4 = load i8, i8* %arrayidx, align 4 + %inc = add nsw i8 %tmp4, 1 + store i8 %inc, i8* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i8 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +define void @slow(i8* %A, i8 %N, i8 %M) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i8 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i8 %indvars.iv, 8 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tmp3 = add nsw i8 %indvars.iv, %N + %mul = mul nsw i8 %tmp3, 16 + %mulext = sext i8 %mul to i16 + %Mext = sext i8 %M to i16 + %add2 = add nsw i16 %mulext, %Mext + %arrayidx = getelementptr inbounds i8, i8* %A, i16 %add2 + %tmp4 = load i8, i8* %arrayidx, align 4 + %inc = add nsw i8 %tmp4, 1 + store i8 %inc, i8* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i8 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +}