Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -841,6 +841,17 @@ /// this scop and that need to be code generated as a run-time test. isl_set *AssumedContext; + /// @brief The boundary assumptions under which this scop was built. + /// + /// The boundary context is similar to the assumed context as it contains + /// constraints over the parameters we assume to be true. However, the + /// boundary context is less useful for dependence analysis and + /// simplification purposes as it contains only constraints that affect the + /// boundaries of the parameter ranges. As these constraints can become quite + /// complex, the boundary context and the assumed context are separated as a + /// meassure to save compile time. + isl_set *BoundaryContext; + /// @brief The schedule of the SCoP /// /// The schedule of the SCoP describes the execution order of the statements @@ -959,14 +970,17 @@ /// @brief Build the Context of the Scop. void buildContext(); + /// @brief Build the BoundaryContext based on the wrapping of expressions. + void buildBoundaryContext(); + /// @brief Add user provided parameter constraints to context. void addUserContext(); /// @brief Add the bounds of the parameters to the context. void addParameterBounds(); - /// @brief Simplify the assumed context. - void simplifyAssumedContext(); + /// @brief Simplify the assumed and boundary context. + void simplifyContexts(); /// @brief Create a new SCoP statement for either @p BB or @p R. /// @@ -1136,6 +1150,21 @@ /// to hold. void addAssumption(__isl_take isl_set *Set); + /// @brief Get the boundary context for this Scop. + /// + /// @return The boundary context of this Scop. + __isl_give isl_set *getBoundaryContext() const; + + /// @brief Get the runtime context for this Scop. + /// + /// The runtime context is the intersection of the assumed and the boundary + /// context. It is used in the AST generation to build the runtime checks that + /// are used to decide if the optimized or original version of the SCoP will + /// be exectued. + /// + /// @return The runtime context of this Scop. + __isl_give isl_set *getRuntimeContext() const; + /// @brief Build the alias checks for this SCoP. void buildAliasChecks(AliasAnalysis &AA); @@ -1155,6 +1184,9 @@ /// @brief Get an isl string representing the assumed context. std::string getAssumedContextStr() const; + /// @brief Get an isl string representing the boundary context. + std::string getBoundaryContextStr() const; + /// @brief Return the stmt for the given @p BB or nullptr if none. ScopStmt *getStmtForBasicBlock(BasicBlock *BB) const; @@ -1221,14 +1253,18 @@ /// @param Domain An (optional) domain in which the isl_pw_aff is computed. /// SCEVs known to not reference any loops in the SCoP can be /// passed without a @p Domain. - __isl_give isl_pw_aff *getPwAff(const SCEV *E, - __isl_keep isl_set *Domain = nullptr); + __isl_give isl_pw_aff *getPwAff(const SCEV *E, BasicBlock *BB = nullptr); /// @brief Return the non-loop carried conditions on the domain of @p Stmt. /// /// @param Stmt The statement for which the conditions should be returned. __isl_give isl_set *getDomainConditions(ScopStmt *Stmt); + /// @brief Return the non-loop carried conditions on the domain of @p BB. + /// + /// @param BB The block for which the conditions should be returned. + __isl_give isl_set *getDomainConditions(BasicBlock *BB); + /// @brief Get a union set containing the iteration domains of all statements. __isl_give isl_union_set *getDomains() const; Index: include/polly/Support/SCEVAffinator.h =================================================================== --- include/polly/Support/SCEVAffinator.h +++ include/polly/Support/SCEVAffinator.h @@ -34,6 +34,8 @@ namespace llvm { class Region; +class BasicBlock; +class DataLayout; class ScalarEvolution; } @@ -49,16 +51,24 @@ /// @brief Translate a SCEV to an isl_pw_aff. /// - /// @param E The expression that is translated. - /// @param Domain The domain in which @p E is executed. + /// @param E he expression that is translated. + /// @param BB The block in which @p E is executed. /// /// @returns The isl representation of the SCEV @p E in @p Domain. __isl_give isl_pw_aff *getPwAff(const llvm::SCEV *E, - __isl_keep isl_set *Domain = nullptr); + llvm::BasicBlock *BB = nullptr); + + /// @brief Compute the context in which integer wrapping is happending. + /// + /// This context contains all parameter configurations for which we + /// know that the wrapping and non-wrapping expressions are different. + /// + /// @returns The context in which integer wrapping is happening. + __isl_give isl_set *getWrappingContext() const; private: /// @brief Key to identify cached expressions. - using CacheKey = std::pair; + using CacheKey = std::pair; /// @brief Map to remembered cached expressions. llvm::DenseMap CachedExpressions; @@ -68,7 +78,30 @@ unsigned NumIterators; const llvm::Region &R; llvm::ScalarEvolution &SE; - isl_set *Domain; + llvm::BasicBlock *BB; + + /// @brief Target data for element size computing. + const llvm::DataLayout &TD; + + /// @brief Compute the non-wrapping version of @p PWA for type @p ExprType. + /// + /// @param PWA The piece-wise affine function that might wrap. + /// @param Type The type of the SCEV that was translated to @p PWA. + /// + /// @returns The expr @p PWA modulo the size constraints of @p ExprType. + __isl_give isl_pw_aff *addModuloSemantic(__isl_take isl_pw_aff *PWA, + llvm::Type *ExprType) const; + + /// @brief Compute the context in which integer wrapping for @p PWA happens. + /// + /// @returns The context in which integer wrapping happens or nullptr if + /// empty. + __isl_give isl_set *getWrappingContext(llvm::SCEV::NoWrapFlags Flags, + llvm::Type *ExprType, + __isl_keep isl_pw_aff *PWA, + __isl_keep isl_set *ExprDomain) const; + + int getLoopDepth(const llvm::Loop *L); __isl_give isl_pw_aff *visit(const llvm::SCEV *E); __isl_give isl_pw_aff *visitConstant(const llvm::SCEVConstant *E); Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -699,7 +699,8 @@ } __isl_give isl_pw_aff *ScopStmt::getPwAff(const SCEV *E) { - return getParent()->getPwAff(E, Domain); + return getParent()->getPwAff(E, isBlockStmt() ? getBasicBlock() + : getRegion()->getEntry()); } void ScopStmt::restrictDomain(__isl_take isl_set *NewDomain) { @@ -797,9 +798,10 @@ "Condition of exiting branch was neither constant nor ICmp!"); ScalarEvolution &SE = *S.getSE(); + BasicBlock *BB = BI->getParent(); isl_pw_aff *LHS, *RHS; - LHS = S.getPwAff(SE.getSCEVAtScope(ICond->getOperand(0), L), Domain); - RHS = S.getPwAff(SE.getSCEVAtScope(ICond->getOperand(1), L), Domain); + LHS = S.getPwAff(SE.getSCEVAtScope(ICond->getOperand(0), L), BB); + RHS = S.getPwAff(SE.getSCEVAtScope(ICond->getOperand(1), L), BB); ConsequenceCondSet = buildConditionSet(ICond->getPredicate(), LHS, RHS); } @@ -1201,6 +1203,12 @@ return isl_set_intersect_params(C, DomainContext); } +void Scop::buildBoundaryContext() { + BoundaryContext = Affinator.getWrappingContext(); + BoundaryContext = isl_set_complement(BoundaryContext); + BoundaryContext = isl_set_gist_params(BoundaryContext, getContext()); +} + void Scop::addUserContext() { if (UserContextStr.empty()) return; @@ -1281,7 +1289,16 @@ Stmt.realignParams(); } -void Scop::simplifyAssumedContext() { +static __isl_give isl_set * +simplifyAssumptionContext(__isl_take isl_set *AssumptionContext, + const Scop &S) { + isl_set *DomainParameters = isl_union_set_params(S.getDomains()); + AssumptionContext = isl_set_gist_params(AssumptionContext, DomainParameters); + AssumptionContext = isl_set_gist_params(AssumptionContext, S.getContext()); + return AssumptionContext; +} + +void Scop::simplifyContexts() { // The parameter constraints of the iteration domains give us a set of // constraints that need to hold for all cases where at least a single // statement iteration is executed in the whole scop. We now simplify the @@ -1310,9 +1327,8 @@ // we assume that the condition m <= 0 or (m >= 1 and p >= 0) holds as // otherwise we would access out of bound data. Now, knowing that code is // only executed for the case m >= 0, it is sufficient to assume p >= 0. - AssumedContext = - isl_set_gist_params(AssumedContext, isl_union_set_params(getDomains())); - AssumedContext = isl_set_gist_params(AssumedContext, getContext()); + AssumedContext = simplifyAssumptionContext(AssumedContext, *this); + BoundaryContext = simplifyAssumptionContext(BoundaryContext, *this); } /// @brief Add the minimal/maximal access in @p Set to @p User. @@ -1447,6 +1463,11 @@ isl_set *Scop::getDomainConditions(ScopStmt *Stmt) { BasicBlock *BB = Stmt->isBlockStmt() ? Stmt->getBasicBlock() : Stmt->getRegion()->getEntry(); + return getDomainConditions(BB); +} + +isl_set *Scop::getDomainConditions(BasicBlock *BB) { + assert(DomainMap.count(BB) && "Requested BB did not have a domain"); return isl_set_copy(DomainMap[BB]); } @@ -1494,7 +1515,8 @@ BasicBlock *BB = getRegionNodeBasicBlock(RN); isl_set *Domain = DomainMap[BB]; - DEBUG(dbgs() << "\tVisit: " << BB->getName() << " : " << Domain << "\n"); + DEBUG(dbgs() << "\tVisit: " << BB->getName() << " : "; + isl_set_dump(Domain)); assert(Domain && "Due to reverse post order traversal of the region all " "predecessor of the current region node should have been " "visited and a domain for this region node should have " @@ -1560,8 +1582,8 @@ SuccDomain = isl_set_union(SuccDomain, CondSet); SuccDomain = isl_set_coalesce(SuccDomain); - DEBUG(dbgs() << "\tSet SuccBB: " << SuccBB->getName() << " : " << Domain - << "\n"); + DEBUG(dbgs() << "\tSet SuccBB: " << SuccBB->getName() << " : "; + isl_set_dump(SuccDomain)); } } } @@ -1968,7 +1990,8 @@ Scop::Scop(Region &R, ScalarEvolution &ScalarEvolution, DominatorTree &DT, isl_ctx *Context, unsigned MaxLoopDepth) : DT(DT), SE(&ScalarEvolution), R(R), IsOptimized(false), - MaxLoopDepth(MaxLoopDepth), IslCtx(Context), Affinator(this) {} + MaxLoopDepth(MaxLoopDepth), IslCtx(Context), Affinator(this), + BoundaryContext(nullptr) {} void Scop::initFromTempScop(TempScop &TempScop, LoopInfo &LI, ScopDetection &SD, AliasAnalysis &AA) { @@ -1987,7 +2010,8 @@ realignParams(); addParameterBounds(); addUserContext(); - simplifyAssumedContext(); + buildBoundaryContext(); + simplifyContexts(); buildAliasChecks(AA); assert(NestLoops.empty() && "NestLoops not empty at top level!"); @@ -2008,6 +2032,7 @@ Scop::~Scop() { isl_set_free(Context); isl_set_free(AssumedContext); + isl_set_free(BoundaryContext); isl_schedule_free(Schedule); for (auto It : DomainMap) @@ -2047,6 +2072,9 @@ std::string Scop::getAssumedContextStr() const { return stringFromIslObj(AssumedContext); } +std::string Scop::getBoundaryContextStr() const { + return stringFromIslObj(BoundaryContext); +} std::string Scop::getNameStr() const { std::string ExitName, EntryName; @@ -2092,6 +2120,17 @@ AssumedContext = isl_set_coalesce(AssumedContext); } +__isl_give isl_set *Scop::getBoundaryContext() const { + return isl_set_copy(BoundaryContext); +} + +__isl_give isl_set *Scop::getRuntimeContext() const { + isl_set *RTContext = getAssumedContext(); + RTContext = isl_set_intersect(RTContext, getBoundaryContext()); + RTContext = simplifyAssumptionContext(RTContext, *this); + return RTContext; +} + void Scop::printContext(raw_ostream &OS) const { OS << "Context:\n"; @@ -2110,6 +2149,14 @@ OS.indent(4) << getAssumedContextStr() << "\n"; + OS.indent(4) << "Boundary Context:\n"; + if (!BoundaryContext) { + OS.indent(4) << "n/a\n\n"; + return; + } + + OS.indent(4) << getBoundaryContextStr() << "\n"; + for (const SCEV *Parameter : Parameters) { int Dim = ParameterIds.find(Parameter)->second; OS.indent(4) << "p" << Dim << ": " << *Parameter << "\n"; @@ -2195,8 +2242,8 @@ isl_ctx *Scop::getIslCtx() const { return IslCtx; } -__isl_give isl_pw_aff *Scop::getPwAff(const SCEV *E, isl_set *Domain) { - return Affinator.getPwAff(E, Domain); +__isl_give isl_pw_aff *Scop::getPwAff(const SCEV *E, BasicBlock *BB) { + return Affinator.getPwAff(E, BB); } __isl_give isl_union_set *Scop::getDomains() const { Index: lib/Support/SCEVAffinator.cpp =================================================================== --- lib/Support/SCEVAffinator.cpp +++ lib/Support/SCEVAffinator.cpp @@ -27,22 +27,23 @@ using namespace polly; SCEVAffinator::SCEVAffinator(Scop *S) - : S(S), Ctx(S->getIslCtx()), R(S->getRegion()), SE(*S->getSE()) {} + : S(S), Ctx(S->getIslCtx()), R(S->getRegion()), SE(*S->getSE()), + TD(R.getEntry()->getParent()->getParent()->getDataLayout()) {} SCEVAffinator::~SCEVAffinator() { - for (const auto &CachedPair : CachedExpressions) { + for (const auto &CachedPair : CachedExpressions) isl_pw_aff_free(CachedPair.second); - isl_set_free(CachedPair.first.second); - } } __isl_give isl_pw_aff *SCEVAffinator::getPwAff(const SCEV *Expr, - isl_set *Domain) { - this->Domain = Domain; - - if (Domain) - NumIterators = isl_set_n_dim(Domain); - else + BasicBlock *BB) { + this->BB = BB; + + if (BB) { + auto *DC = S->getDomainConditions(BB); + NumIterators = isl_set_n_dim(DC); + isl_set_free(DC); + } else NumIterators = 0; S->addParams(getParamsInAffineExpr(&R, Expr, SE)); @@ -50,12 +51,107 @@ return visit(Expr); } +__isl_give isl_set * +SCEVAffinator::getWrappingContext(SCEV::NoWrapFlags Flags, Type *ExprType, + __isl_keep isl_pw_aff *PWA, + __isl_take isl_set *ExprDomain) const { + // If the SCEV flags do contain NSW (no signed wrap) then PWA already + // represents Expr in modulo semantic (it is not allowed to overflow), thus we + // are done. Otherwise, we will compute: + // PWA = ((PWA + 2^(n-1)) mod (2 ^ n)) - 2^(n-1) + // whereas n is the number of bits of the Expr, hence: + // n = bitwidth(ExprType) + + if (Flags & SCEV::FlagNSW) + return nullptr; + + isl_pw_aff *PWAMod = addModuloSemantic(isl_pw_aff_copy(PWA), ExprType); + if (isl_pw_aff_is_equal(PWA, PWAMod)) { + isl_pw_aff_free(PWAMod); + return nullptr; + } + + PWA = isl_pw_aff_copy(PWA); + + if (ExprDomain) { + isl_set_dump(ExprDomain); + ExprDomain = isl_set_reset_tuple_id(isl_set_copy(ExprDomain)); + isl_pw_aff_dump(PWA); + isl_pw_aff_dump(PWAMod); + PWA = isl_pw_aff_intersect_domain(PWA, isl_set_copy(ExprDomain)); + PWAMod = isl_pw_aff_intersect_domain(PWAMod, ExprDomain); + } + + auto *NotEqualSet = isl_pw_aff_ne_set(PWA, PWAMod); + NotEqualSet = isl_set_gist_params(NotEqualSet, S->getContext()); + NotEqualSet = isl_set_params(NotEqualSet); + return NotEqualSet; +} + +__isl_give isl_set *SCEVAffinator::getWrappingContext() const { + + isl_set *WrappingCtx = isl_set_empty(S->getParamSpace()); + + for (const auto &CachedPair : CachedExpressions) { + const SCEV *Expr = CachedPair.first.first; + SCEV::NoWrapFlags Flags; + + switch (Expr->getSCEVType()) { + case scAddExpr: + Flags = cast(Expr)->getNoWrapFlags(); + break; + case scMulExpr: + Flags = cast(Expr)->getNoWrapFlags(); + break; + case scAddRecExpr: + Flags = cast(Expr)->getNoWrapFlags(); + break; + default: + continue; + } + + isl_pw_aff *PWA = CachedPair.second; + BasicBlock *BB = CachedPair.first.second; + isl_set *ExprDomain = BB ? S->getDomainConditions(BB) : nullptr; + + isl_set *WPWACtx = + getWrappingContext(Flags, Expr->getType(), PWA, ExprDomain); + isl_set_free(ExprDomain); + + WrappingCtx = WPWACtx ? isl_set_union(WrappingCtx, WPWACtx) : WrappingCtx; + } + + return WrappingCtx; +} + +__isl_give isl_pw_aff * +SCEVAffinator::addModuloSemantic(__isl_take isl_pw_aff *PWA, + Type *ExprType) const { + unsigned Width = TD.getTypeStoreSizeInBits(ExprType); + isl_ctx *Ctx = isl_pw_aff_get_ctx(PWA); + + isl_val *ModVal = isl_val_int_from_ui(Ctx, Width); + ModVal = isl_val_2exp(ModVal); + + isl_val *AddVal = isl_val_int_from_ui(Ctx, Width - 1); + AddVal = isl_val_2exp(AddVal); + + isl_set *Domain = isl_pw_aff_domain(isl_pw_aff_copy(PWA)); + + isl_pw_aff *AddPW = isl_pw_aff_val_on_domain(Domain, AddVal); + + PWA = isl_pw_aff_add(PWA, isl_pw_aff_copy(AddPW)); + PWA = isl_pw_aff_mod_val(PWA, ModVal); + PWA = isl_pw_aff_sub(PWA, AddPW); + + return PWA; +} + __isl_give isl_pw_aff *SCEVAffinator::visit(const SCEV *Expr) { - auto Key = std::make_pair(Expr, isl_set_copy(Domain)); + auto Key = std::make_pair(Expr, BB); isl_pw_aff *PWA = CachedExpressions[Key]; if (PWA) { - isl_set_free(Domain); return isl_pw_aff_copy(PWA); } @@ -77,6 +173,11 @@ } PWA = SCEVVisitor::visit(Expr); + + // For compile time reasons we need to simplify the PWA before we cache and + // return it. + PWA = isl_pw_aff_coalesce(PWA); + CachedExpressions[Key] = PWA; return isl_pw_aff_copy(PWA); } @@ -127,8 +228,6 @@ Sum = isl_pw_aff_add(Sum, NextSummand); } - // TODO: Check for NSW and NUW. - return Sum; } @@ -169,7 +268,6 @@ isl_aff_zero_on_domain(LocalSpace), isl_dim_in, loopDimension, 1); isl_pw_aff *LPwAff = isl_pw_aff_from_aff(LAff); - // TODO: Do we need to check for NSW and NUW? return isl_pw_aff_mul(Step, LPwAff); } Index: test/DependenceInfo/sequential_loops.ll =================================================================== --- test/DependenceInfo/sequential_loops.ll +++ test/DependenceInfo/sequential_loops.ll @@ -272,7 +272,11 @@ ; VALUE: RAW dependences: ; VALUE: [p] -> { ; VALUE: Stmt_S1[i0] -> Stmt_S2[-p + i0] : -; VALUE: i0 >= 0 and i0 <= 9 + p and i0 >= p and i0 <= 99 and p <= 190 +; VALUE-DAG: p <= 190 +; VALUE-DAG: i0 >= p +; VALUE-DAG: i0 <= 9 + p +; VALUE-DAG: i0 <= 99 +; VALUE-DAG: i0 >= 0 ; VALUE: } ; VALUE: WAR dependences: ; VALUE: [p] -> { Index: test/Isl/CodeGen/pointer-type-expressions-2.ll =================================================================== --- test/Isl/CodeGen/pointer-type-expressions-2.ll +++ test/Isl/CodeGen/pointer-type-expressions-2.ll @@ -23,7 +23,7 @@ ; Check that we transform this into a pointer difference. -; CODEGEN: %0 = ptrtoint i8* %end to i64 -; CODEGEN: %1 = ptrtoint i8* %start to i64 -; CODEGEN: %2 = sub i64 %0, %1 +; CODEGEN: %[[r0:[._a-zA-Z0-9]]] = ptrtoint i8* %end to i64 +; CODEGEN: %[[r1:[._a-zA-Z0-9]]] = ptrtoint i8* %start to i64 +; CODEGEN: %[[r2:[._a-zA-Z0-9]]] = sub i64 %[[r0]], %[[r1]] Index: test/Isl/CodeGen/pointer-type-expressions.ll =================================================================== --- test/Isl/CodeGen/pointer-type-expressions.ll +++ test/Isl/CodeGen/pointer-type-expressions.ll @@ -41,6 +41,7 @@ ; CHECK: Stmt_store(c0); ; CHECK: } -; CODEGEN: %0 = bitcast float* %P to i8* -; CODEGEN: %1 = icmp ule i8* %0, inttoptr (i64 -1 to i8*) +; CODEGEN: %[[R0:[0-9]*]] = bitcast float* %P to i8* +; CODEGEN: %[[R1:[0-9]*]] = bitcast float* %P to i8* +; CODEGEN-NEXT: icmp ule i8* %[[R1]], inttoptr (i64 -1 to i8*) Index: test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_3.ll =================================================================== --- test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_3.ll +++ test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_3.ll @@ -18,7 +18,19 @@ ; INNERMOST: Region: %bb15---%bb26 ; INNERMOST: Max Loop Depth: 1 ; INNERMOST: Context: -; INNERMOST: [p_0, p_1, p_2] -> { : p_0 >= 0 and p_0 <= 2147483647 and p_1 >= 0 and p_1 <= 4096 and p_2 >= 0 and p_2 <= 4096 } +; INNERMOST: [p_0, p_1, p_2] -> { : +; INNERMOST-DAG: p_0 >= 0 +; INNERMOST-DAG: and +; INNERMOST-DAG: p_0 <= 2147483647 +; INNERMOST-DAG: and +; INNERMOST-DAG: p_1 >= 0 +; INNERMOST-DAG: and +; INNERMOST-DAG: p_1 <= 4096 +; INNERMOST-DAG: and +; INNERMOST-DAG: p_2 >= 0 +; INNERMOST-DAG: and +; INNERMOST-DAG: p_2 <= 4096 +; INNERMOST: } ; INNERMOST: Assumed Context: ; INNERMOST: [p_0, p_1, p_2] -> { : } ; INNERMOST: p0: {0,+,{0,+,1}<%bb11>}<%bb13> Index: test/ScopInfo/NonAffine/non_affine_conditional_surrounding_affine_loop.ll =================================================================== --- test/ScopInfo/NonAffine/non_affine_conditional_surrounding_affine_loop.ll +++ test/ScopInfo/NonAffine/non_affine_conditional_surrounding_affine_loop.ll @@ -10,7 +10,11 @@ ; INNERMOST: Region: %bb9---%bb17 ; INNERMOST: Max Loop Depth: 1 ; INNERMOST: Context: -; INNERMOST: [N] -> { : N >= -2147483648 and N <= 2147483647 } +; INNERMOST: [N] -> { : +; INNERMOST-DAG: N >= -2147483648 +; INNERMOST-DAG: and +; INNERMOST-DAG: N <= 2147483647 +; INNERMOST } ; INNERMOST: Assumed Context: ; INNERMOST: [N] -> { : } ; INNERMOST: p0: %N Index: test/ScopInfo/NonAffine/non_affine_conditional_surrounding_non_affine_loop.ll =================================================================== --- test/ScopInfo/NonAffine/non_affine_conditional_surrounding_non_affine_loop.ll +++ test/ScopInfo/NonAffine/non_affine_conditional_surrounding_non_affine_loop.ll @@ -9,7 +9,11 @@ ; INNERMOST: Region: %bb9---%bb18 ; INNERMOST: Max Loop Depth: 1 ; INNERMOST: Context: -; INNERMOST: [p_0] -> { : p_0 >= -2199023255552 and p_0 <= 2199023254528 } +; INNERMOST: [p_0] -> { : +; INNERMOST-DAG: p_0 >= -2199023255552 +; INNERMOST-DAG: and +; INNERMOST-DAG: p_0 <= 2199023254528 +; INNERMOST: } ; INNERMOST: Assumed Context: ; INNERMOST: [p_0] -> { : } ; INNERMOST: p0: {0,+,(sext i32 %N to i64)}<%bb3> Index: test/ScopInfo/NonAffine/non_affine_loop_used_later.ll =================================================================== --- test/ScopInfo/NonAffine/non_affine_loop_used_later.ll +++ test/ScopInfo/NonAffine/non_affine_loop_used_later.ll @@ -7,7 +7,11 @@ ; CHECK: Region: %bb2---%bb24 ; CHECK: Max Loop Depth: 1 ; CHECK: Context: -; CHECK: [N] -> { : N >= -2147483648 and N <= 2147483647 } +; CHECK: [N] -> { : +; CHECK-DAG: N >= -2147483648 +; CHECK-DAG: and +; CHECK-DAG: N <= 2147483647 +; CHECK: } ; CHECK: Assumed Context: ; CHECK: [N] -> { : } ; CHECK: p0: %N Index: test/ScopInfo/assume_gep_bounds.ll =================================================================== --- test/ScopInfo/assume_gep_bounds.ll +++ test/ScopInfo/assume_gep_bounds.ll @@ -20,6 +20,8 @@ ; CHECK-DAG: p <= 30 ; CHECK-DAG: and ; CHECK-DAG: m <= 20 +; CHECK-DAG: and +; CHECK-DAG: p <= 2305843009213694582 - 600n - 30m ; CHECK: } target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/ScopInfo/assume_gep_bounds_2.ll =================================================================== --- test/ScopInfo/assume_gep_bounds_2.ll +++ test/ScopInfo/assume_gep_bounds_2.ll @@ -16,10 +16,14 @@ ; accessed. In this case the value of m does not matter. ; CHECK: Assumed Context: -; CHECK-NEXT: [n, m, p] -> { : -; CHECK-DAG: (n >= 1 and m <= 20 and p <= 20) +; CHECK-NEXT: [n, m, p] -> { : (n <= 0 and p <= 20) or (n >= 1 and m <= 20 and p <= 20) } +; CHECK: Boundary Context: +; CHECK-NEXT: [n, m, p] -> { : +; CHECK-DAG: (n <= 0 and m >= 115292150460684699 and p <= 0) ; CHECK-DAG: or -; CHECK-DAG: (n <= 0 and p <= 20) +; CHECK-DAG: (n >= 1 and m <= 2305843009213693972 - 20n and m >= 115292150460684699 and p <= 0) +; CHECK-DAG: or +; CHECK-DAG: (m <= 2305843009213693972 - 20n and m <= 115292150460684698 and p <= 2305843009213693972 - 20m) ; CHECK: } target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/ScopInfo/loop_carry.ll =================================================================== --- test/ScopInfo/loop_carry.ll +++ test/ScopInfo/loop_carry.ll @@ -45,8 +45,6 @@ ret i64 0 } -; CHECK: Context: -; CHECK: [n] -> { : } ; CHECK: Statements { ; CHECK: Stmt_bb ; CHECK: Domain := Index: test/ScopInfo/multidim_2d_outer_parametric_offset.ll =================================================================== --- test/ScopInfo/multidim_2d_outer_parametric_offset.ll +++ test/ScopInfo/multidim_2d_outer_parametric_offset.ll @@ -10,7 +10,7 @@ ; } ; CHECK: Assumed Context: -; CHECK: [m, p] -> { : } +; CHECK: [m, p] -> { : p <= 9223372036854775708 } ; CHECK: p0: %m ; CHECK: p1: %p ; CHECK: Statements { Index: test/ScopInfo/multidim_srem.ll =================================================================== --- test/ScopInfo/multidim_srem.ll +++ test/ScopInfo/multidim_srem.ll @@ -8,9 +8,21 @@ ; } ; ; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK: [n] -> { Stmt_for_body_8[i0, i1, i2] -> MemRef_A[o0, i1, i2] : exists (e0 = floor((-i0 + o0)/2): 2e0 = -i0 + o0 and o0 <= 1 and o0 >= 0) }; +; CHECK: [n] -> { Stmt_for_body_8[i0, i1, i2] -> MemRef_A[o0, i1, i2] : exists (e0 = floor((-i0 + o0)/2): +; CHECK-DAG: 2e0 = -i0 + o0 +; CHECK-DAG: and +; CHECK-DAG: o0 <= 1 +; CHECK-DAG: and +; CHECK-DAG: o0 >= 0 +; CHECK: }; ; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK: [n] -> { Stmt_for_body_8[i0, i1, i2] -> MemRef_A[o0, i1, i2] : exists (e0 = floor((-i0 + o0)/2): 2e0 = -i0 + o0 and o0 <= 1 and o0 >= 0) }; +; CHECK: [n] -> { Stmt_for_body_8[i0, i1, i2] -> MemRef_A[o0, i1, i2] : exists (e0 = floor((-i0 + o0)/2): +; CHECK-DAG: 2e0 = -i0 + o0 +; CHECK-DAG: and +; CHECK-DAG: o0 <= 1 +; CHECK-DAG: and +; CHECK-DAG: o0 >= 0 +; CHECK: }; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/ScopInfo/pointer-type-expressions.ll =================================================================== --- test/ScopInfo/pointer-type-expressions.ll +++ test/ScopInfo/pointer-type-expressions.ll @@ -19,7 +19,7 @@ br i1 %brcond, label %store, label %bb.backedge store: - %scevgep = getelementptr i64, i64* %a, i64 %i + %scevgep = getelementptr inbounds i64, i64* %a, i64 %i store i64 %i, i64* %scevgep br label %bb.backedge Index: test/ScopInfo/ranged_parameter.ll =================================================================== --- test/ScopInfo/ranged_parameter.ll +++ test/ScopInfo/ranged_parameter.ll @@ -4,7 +4,11 @@ ; range metadata (see bottom of the file) are present: ; ; CHECK: Context: -; CHECK: [p_0] -> { : p_0 >= 0 and p_0 <= 255 } +; CHECK: [p_0] -> { : +; CHECK-DAG: p_0 >= 0 +; CHECK-DAG: and +; CHECK-DAG: p_0 <= 255 +; CHECK: } ; ; void jd(int *A, int *p /* in [0,256) */) { ; for (int i = 0; i < 1024; i++) Index: test/ScopInfo/simple_loop_1.ll =================================================================== --- test/ScopInfo/simple_loop_1.ll +++ test/ScopInfo/simple_loop_1.ll @@ -14,7 +14,7 @@ bb: ; preds = %bb, %entry %i = phi i64 [ 0, %entry ], [ %i.inc, %bb ] - %scevgep = getelementptr i64, i64* %a, i64 %i + %scevgep = getelementptr inbounds i64, i64* %a, i64 %i store i64 %i, i64* %scevgep %i.inc = add nsw i64 %i, 1 %exitcond = icmp eq i64 %i.inc, %N Index: test/ScopInfo/test-wrapping-in-condition.ll =================================================================== --- /dev/null +++ test/ScopInfo/test-wrapping-in-condition.ll @@ -0,0 +1,45 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; CHECK: Boundary Context: +; CHECK: [N] -> { : N <= 128 } +; +; #include +; #include +; +; void __attribute__((noinline)) foo(float *A, long N) { +; for (long i = 0; i < N; i++) +; if ((signed char)i < 100) +; A[i] += i; +; } +define void @foo(float* %A, i64 %N) { +bb: + br label %bb1 + +bb1: ; preds = %bb11, %bb + %i.0 = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ] + %tmp = icmp slt i64 %i.0, %N + br i1 %tmp, label %bb2, label %bb13 + +bb2: ; preds = %bb1 + %tmp3 = trunc i64 %i.0 to i8 + %tmp4 = icmp slt i8 %tmp3, 100 + br i1 %tmp4, label %bb5, label %bb10 + +bb5: ; preds = %bb2 + %tmp6 = sitofp i64 %i.0 to float + %tmp7 = getelementptr inbounds float, float* %A, i64 %i.0 + %tmp8 = load float, float* %tmp7, align 4 + %tmp9 = fadd float %tmp8, %tmp6 + store float %tmp9, float* %tmp7, align 4 + br label %bb10 + +bb10: ; preds = %bb5, %bb2 + br label %bb11 + +bb11: ; preds = %bb10 + %tmp12 = add nuw nsw i64 %i.0, 1 + br label %bb1 + +bb13: ; preds = %bb1 + ret void +} Index: test/ScopInfo/unsigned-condition.ll =================================================================== --- test/ScopInfo/unsigned-condition.ll +++ test/ScopInfo/unsigned-condition.ll @@ -19,7 +19,7 @@ br i1 %brcond, label %store, label %bb.backedge store: - %scevgep = getelementptr i64, i64* %a, i64 %i + %scevgep = getelementptr inbounds i64, i64* %a, i64 %i store i64 %i, i64* %scevgep br label %bb.backedge Index: test/ScopInfo/wraping_signed_expr_0.ll =================================================================== --- /dev/null +++ test/ScopInfo/wraping_signed_expr_0.ll @@ -0,0 +1,71 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s +; +; void f(int *A, char N, char p) { +; for (char i = 0; i < N; i++) { +; A[i + 3] = 0; +; } +; } +; +; The wrap function has no inbounds GEP but the nowrap function has. Therefore, +; we will add the assumption that i+1 won't overflow only to the former. +; +; CHECK: Function: wrap +; CHECK: Boundary Context: +; CHECK: [N] -> { : N <= 125 } +; +; +; FIXME: This is a negative test as nowrap should not need an assumed context. +; However %tmp5 in @nowrap is translated to the SCEV <3,+,1><%bb2> +; which lacks the flags we would need to avoid runtime checks. +; +; CHECK: Function: nowrap +; CHECK: Boundary Context: +; CHECK-NOT: [N] -> { : } +; +target datalayout = "e-m:e-i8:64-f80:128-n8:16:32:64-S128" + +define void @wrap(i32* %A, i8 %N, i8 %p) { +bb: + br label %bb2 + +bb2: ; preds = %bb7, %bb + %indvars.iv = phi i8 [ %indvars.iv.next, %bb7 ], [ 0, %bb ] + %tmp3 = icmp slt i8 %indvars.iv, %N + br i1 %tmp3, label %bb4, label %bb8 + +bb4: ; preds = %bb2 + %tmp5 = add i8 %indvars.iv, 3 + %tmp6 = getelementptr i32, i32* %A, i8 %tmp5 + store i32 0, i32* %tmp6, align 4 + br label %bb7 + +bb7: ; preds = %bb4 + %indvars.iv.next = add nsw nuw i8 %indvars.iv, 1 + br label %bb2 + +bb8: ; preds = %bb2 + ret void +} + +define void @nowrap(i32* %A, i8 %N, i8 %p) { +bb: + br label %bb2 + +bb2: ; preds = %bb7, %bb + %indvars.iv = phi i8 [ %indvars.iv.next, %bb7 ], [ 0, %bb ] + %tmp3 = icmp slt i8 %indvars.iv, %N + br i1 %tmp3, label %bb4, label %bb8 + +bb4: ; preds = %bb2 + %tmp5 = add nsw nuw i8 %indvars.iv, 3 + %tmp6 = getelementptr inbounds i32, i32* %A, i8 %tmp5 + store i32 0, i32* %tmp6, align 4 + br label %bb7 + +bb7: ; preds = %bb4 + %indvars.iv.next = add nsw nuw i8 %indvars.iv, 1 + br label %bb2 + +bb8: ; preds = %bb2 + ret void +} Index: test/ScopInfo/wraping_signed_expr_1.ll =================================================================== --- /dev/null +++ test/ScopInfo/wraping_signed_expr_1.ll @@ -0,0 +1,72 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s +; +; void f(long *A, long N, long p) { +; for (long i = 0; i < N; i++) +; A[i + 1] = 0; +; } +; +; The wrap function has no inbounds GEP but the nowrap function has. Therefore, +; we will add the assumption that i+1 won't overflow only to the former. +; +; Note: +; 1152921504606846975 * sizeof(long) <= 2 ^ 63 - 1 +; and +; 1152921504606846976 * sizeof(long) > 2 ^ 63 - 1 +; with +; sizeof(long) == 8 +; +; CHECK: Function: wrap +; CHECK: Boundary Context: +; CHECK: [N] -> { : N <= 1152921504606846975 } +; +; CHECK: Function: nowrap +; CHECK: Boundary Context: +; CHECK: [N] -> { : } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @wrap(i64* %A, i64 %N, i64 %p) { +bb: + br label %bb2 + +bb2: ; preds = %bb7, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb7 ], [ 0, %bb ] + %tmp3 = icmp slt i64 %indvars.iv, %N + br i1 %tmp3, label %bb4, label %bb8 + +bb4: ; preds = %bb2 + %tmp5 = add nsw nuw i64 %indvars.iv, 1 + %tmp6 = getelementptr i64, i64* %A, i64 %tmp5 + store i64 0, i64* %tmp6, align 4 + br label %bb7 + +bb7: ; preds = %bb4 + %indvars.iv.next = add nsw nuw i64 %indvars.iv, 1 + br label %bb2 + +bb8: ; preds = %bb2 + ret void +} + +define void @nowrap(i64* %A, i64 %N, i64 %p) { +bb: + br label %bb2 + +bb2: ; preds = %bb7, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb7 ], [ 0, %bb ] + %tmp3 = icmp slt i64 %indvars.iv, %N + br i1 %tmp3, label %bb4, label %bb8 + +bb4: ; preds = %bb2 + %tmp5 = add nsw nuw i64 %indvars.iv, 1 + %tmp6 = getelementptr inbounds i64, i64* %A, i64 %tmp5 + store i64 0, i64* %tmp6, align 4 + br label %bb7 + +bb7: ; preds = %bb4 + %indvars.iv.next = add nsw nuw i64 %indvars.iv, 1 + br label %bb2 + +bb8: ; preds = %bb2 + ret void +} Index: test/ScopInfo/wraping_signed_expr_2.ll =================================================================== --- /dev/null +++ test/ScopInfo/wraping_signed_expr_2.ll @@ -0,0 +1,42 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s +; +; void f(int *A, int N, int p) { +; for (int i = 0; i < N; i++) +; A[i + 30] = 0; +; } +; +; The wrap function has no inbounds GEP but the nowrap function has. Therefore, +; we will add the assumption that i+1 won't overflow only to the former. +; +; Note: 2147483618 + 30 == 2 ^ 31 +; +; CHECK: Function: wrap +; CHECK: Context: +; CHECK: [N] -> { : N <= 2147483647 and N >= -2147483648 } +; CHECK: Boundary Context: +; CHECK: [N] -> { : N <= 2147483618 } +; +target datalayout = "e-m:e-i32:64-f80:128-n8:16:32:64-S128" + +define void @wrap(i32* %A, i32 %N, i32 %p) { +bb: + br label %bb2 + +bb2: ; preds = %bb7, %bb + %indvars.iv = phi i32 [ %indvars.iv.next, %bb7 ], [ 0, %bb ] + %tmp3 = icmp slt i32 %indvars.iv, %N + br i1 %tmp3, label %bb4, label %bb8 + +bb4: ; preds = %bb2 + %tmp5 = add i32 %indvars.iv, 30 + %tmp6 = getelementptr i32, i32* %A, i32 %tmp5 + store i32 0, i32* %tmp6, align 4 + br label %bb7 + +bb7: ; preds = %bb4 + %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 + br label %bb2 + +bb8: ; preds = %bb2 + ret void +} Index: test/ScopInfo/wraping_signed_expr_3.ll =================================================================== --- /dev/null +++ test/ScopInfo/wraping_signed_expr_3.ll @@ -0,0 +1,37 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s +; +; void f(int *A, int N, int p) { +; for (int i = 0; i < N; i++) +; A[i + p] = 0; +; } +; +; Note: 2147483648 == 2 ^ 31 +; +; CHECK: Function: wrap +; CHECK: Boundary Context: +; CHECK: [N, p] -> { : p <= 2147483648 - N } +; +target datalayout = "e-m:e-i32:64-f80:128-n8:16:32:64-S128" + +define void @wrap(i32* %A, i32 %N, i32 %p) { +bb: + br label %bb2 + +bb2: ; preds = %bb7, %bb + %indvars.iv = phi i32 [ %indvars.iv.next, %bb7 ], [ 0, %bb ] + %tmp3 = icmp slt i32 %indvars.iv, %N + br i1 %tmp3, label %bb4, label %bb8 + +bb4: ; preds = %bb2 + %tmp5 = add i32 %indvars.iv, %p + %tmp6 = getelementptr inbounds i32, i32* %A, i32 %tmp5 + store i32 0, i32* %tmp6, align 4 + br label %bb7 + +bb7: ; preds = %bb4 + %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 + br label %bb2 + +bb8: ; preds = %bb2 + ret void +} Index: test/ScopInfo/wraping_signed_expr_4.ll =================================================================== --- /dev/null +++ test/ScopInfo/wraping_signed_expr_4.ll @@ -0,0 +1,37 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s +; +; void f(char *A, char N, char p) { +; for (char i = 0; i < N; i++) +; A[p-1] = 0; +; } +; +; CHECK: Function: wrap +; CHECK: Context: +; CHECK: [N, p] -> { : N <= 127 and N >= -128 and p <= 127 and p >= -128 } +; CHECK: Boundary Context: +; CHECK: [N, p] -> { : p >= -127 } +; +target datalayout = "e-m:e-i8:64-f80:128-n8:16:32:64-S128" + +define void @wrap(i8* %A, i8 %N, i8 %p) { +bb: + br label %bb2 + +bb2: ; preds = %bb7, %bb + %indvars.iv = phi i8 [ %indvars.iv.next, %bb7 ], [ 0, %bb ] + %tmp3 = icmp slt i8 %indvars.iv, %N + br i1 %tmp3, label %bb4, label %bb8 + +bb4: ; preds = %bb2 + %tmp5 = add i8 %p, -1 + %tmp6 = getelementptr i8, i8* %A, i8 %tmp5 + store i8 0, i8* %tmp6, align 4 + br label %bb7 + +bb7: ; preds = %bb4 + %indvars.iv.next = add nuw nsw i8 %indvars.iv, 1 + br label %bb2 + +bb8: ; preds = %bb2 + ret void +} Index: test/ScopInfo/wraping_signed_expr_5.ll =================================================================== --- /dev/null +++ test/ScopInfo/wraping_signed_expr_5.ll @@ -0,0 +1,48 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s +; +; We should not generate runtime check for ((int)r1 + (int)r2) as it is known not +; to overflow. However (p + q) can, thus checks are needed. +; +; CHECK: Boundary Context: +; CHECK: [r1, r2, q, p] -> { +; CHECK-DAG: p <= 2147483647 - q +; CHECK-DAG: and +; CHECK-DAG: p >= -2147483648 - q +; CHECK: } +; +; void wraps(int *A, int p, short q, char r1, char r2) { +; for (char i = r1; i < r2; i++) +; A[p + q] = A[(int)r1 + (int)r2]; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @wraps(i32* %A, i32 %p, i16 signext %q, i8 signext %r1, i8 signext %r2) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i8 [ %r1, %entry ], [ %inc, %for.inc ] + %cmp = icmp slt i8 %i.0, %r2 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %conv3 = sext i8 %r1 to i64 + %conv4 = sext i8 %r2 to i64 + %add = add nsw i64 %conv3, %conv4 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %add + %tmp = load i32, i32* %arrayidx, align 4 + %conv5 = sext i16 %q to i32 + %add6 = add nsw i32 %conv5, %p + %idxprom7 = sext i32 %add6 to i64 + %arrayidx8 = getelementptr inbounds i32, i32* %A, i64 %idxprom7 + store i32 %tmp, i32* %arrayidx8, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add i8 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/ScopInfo/wraping_signed_expr_6.ll =================================================================== --- /dev/null +++ test/ScopInfo/wraping_signed_expr_6.ll @@ -0,0 +1,42 @@ +; RUN: opt %loadPolly -polly-scops -polly-detect-unprofitable -analyze < %s | FileCheck %s +; +; CHECK: Boundary Context: +; CHECK: [N] -> { : N <= 128 } +; +; void foo(float *A, long N) { +; for (long i = 0; i < N; i++) +; if ((signed char)i < 100) +; A[i] += i; +; } +define void @foo(float* %A, i64 %N) { +bb: + br label %bb1 + +bb1: ; preds = %bb11, %bb + %i.0 = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ] + %tmp = icmp slt i64 %i.0, %N + br i1 %tmp, label %bb2, label %bb13 + +bb2: ; preds = %bb1 + %tmp3 = trunc i64 %i.0 to i8 + %tmp4 = icmp slt i8 %tmp3, 100 + br i1 %tmp4, label %bb5, label %bb10 + +bb5: ; preds = %bb2 + %tmp6 = sitofp i64 %i.0 to float + %tmp7 = getelementptr inbounds float, float* %A, i64 %i.0 + %tmp8 = load float, float* %tmp7, align 4 + %tmp9 = fadd float %tmp8, %tmp6 + store float %tmp9, float* %tmp7, align 4 + br label %bb10 + +bb10: ; preds = %bb5, %bb2 + br label %bb11 + +bb11: ; preds = %bb10 + %tmp12 = add nuw nsw i64 %i.0, 1 + br label %bb1 + +bb13: ; preds = %bb1 + ret void +} Index: test/ScopInfo/wraping_signed_expr_7.ll =================================================================== --- /dev/null +++ test/ScopInfo/wraping_signed_expr_7.ll @@ -0,0 +1,42 @@ +; RUN: opt %loadPolly -polly-scops -polly-detect-unprofitable -analyze < %s | FileCheck %s +; +; CHECK: Boundary Context: +; CHECK: [N] -> { : N <= 128 } +; +; void foo(float *A, long N) { +; for (long i = 0; i < N;) +; if ((signed char)i++ < 100) +; A[i] += i; +; } +define void @foo(float* %A, i64 %N) { +bb: + br label %bb1 + +bb1: ; preds = %bb11, %bb + %i.0 = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ] + %tmp = icmp slt i64 %i.0, %N + br i1 %tmp, label %bb2, label %bb13 + +bb2: ; preds = %bb1 + %tmp12 = add nuw nsw i64 %i.0, 1 + %tmp3 = trunc i64 %i.0 to i8 + %tmp4 = icmp slt i8 %tmp3, 100 + br i1 %tmp4, label %bb5, label %bb10 + +bb5: ; preds = %bb2 + %tmp6 = sitofp i64 %i.0 to float + %tmp7 = getelementptr inbounds float, float* %A, i64 %i.0 + %tmp8 = load float, float* %tmp7, align 4 + %tmp9 = fadd float %tmp8, %tmp6 + store float %tmp9, float* %tmp7, align 4 + br label %bb10 + +bb10: ; preds = %bb5, %bb2 + br label %bb11 + +bb11: ; preds = %bb10 + br label %bb1 + +bb13: ; preds = %bb1 + ret void +} Index: test/ScopInfo/wraping_signed_expr_slow_1.ll =================================================================== --- /dev/null +++ test/ScopInfo/wraping_signed_expr_slow_1.ll @@ -0,0 +1,81 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s +; +; This checks that the no-wraps checks will be computed fast as some example +; already showed huge slowdowns even though the inbounds and nsw flags were +; all in place. +; +; // Inspired by itrans8x8 in transform8x8.c from the ldecode benchmark. +; void fast(char *A, char N, char M) { +; for (char i = 0; i < 8; i++) { +; short index0 = (short)(i + N); +; #ifdef fast +; short index1 = (index0 * 1) + (short)M; +; #else +; short index1 = (index0 * 16) + (short)M; +; #endif +; A[index1]++; +; } +; } +; +; CHECK: Function: fast +; CHECK: Function: slow +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @fast(i8* %A, i8 %N, i8 %M) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i8 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i8 %indvars.iv, 8 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tmp3 = add nsw i8 %indvars.iv, %N + %tmp3ext = sext i8 %tmp3 to i16 + ;%mul = mul nsw i16 %tmp3ext, 16 + %Mext = sext i8 %M to i16 + %add2 = add nsw i16 %tmp3ext, %Mext + %arrayidx = getelementptr inbounds i8, i8* %A, i16 %add2 + %tmp4 = load i8, i8* %arrayidx, align 4 + %inc = add nsw i8 %tmp4, 1 + store i8 %inc, i8* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i8 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +define void @slow(i8* %A, i8 %N, i8 %M) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i8 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i8 %indvars.iv, 8 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tmp3 = add nsw i8 %indvars.iv, %N + %tmp3ext = sext i8 %tmp3 to i16 + %mul = mul nsw i16 %tmp3ext, 16 + %Mext = sext i8 %M to i16 + %add2 = add nsw i16 %mul, %Mext + %arrayidx = getelementptr inbounds i8, i8* %A, i16 %add2 + %tmp4 = load i8, i8* %arrayidx, align 4 + %inc = add nsw i8 %tmp4, 1 + store i8 %inc, i8* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i8 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/ScopInfo/wraping_signed_expr_slow_2.ll =================================================================== --- /dev/null +++ test/ScopInfo/wraping_signed_expr_slow_2.ll @@ -0,0 +1,86 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s +; +; This checks that the no-wraps checks will be computed fast as some example +; already showed huge slowdowns even though the inbounds and nsw flags were +; all in place. +; +; // Inspired by itrans8x8 in transform8x8.c from the ldecode benchmark. +; void fast(char *A, char N, char M) { +; for (char i = 0; i < 8; i++) { +; char index0 = i + N; +; char index1 = index0 * 16; +; char index2 = index1 + M; +; A[(short)index2]++; +; } +; } +; +; void slow(char *A, char N, char M) { +; for (char i = 0; i < 8; i++) { +; char index0 = i + N; +; char index1 = index0 * 16; +; short index2 = ((short)index1) + ((short)M); +; A[index2]++; +; } +; } +; +; CHECK: Function: fast +; CHECK: Function: slow +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @fast(i8* %A, i8 %N, i8 %M) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i8 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i8 %indvars.iv, 8 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tmp3 = add nsw i8 %indvars.iv, %N + %mul = mul nsw i8 %tmp3, 16 + %add2 = add nsw i8 %mul, %M + %add2ext = sext i8 %add2 to i16 + %arrayidx = getelementptr inbounds i8, i8* %A, i16 %add2ext + %tmp4 = load i8, i8* %arrayidx, align 4 + %inc = add nsw i8 %tmp4, 1 + store i8 %inc, i8* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i8 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +define void @slow(i8* %A, i8 %N, i8 %M) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i8 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i8 %indvars.iv, 8 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tmp3 = add nsw i8 %indvars.iv, %N + %mul = mul nsw i8 %tmp3, 16 + %mulext = sext i8 %mul to i16 + %Mext = sext i8 %M to i16 + %add2 = add nsw i16 %mulext, %Mext + %arrayidx = getelementptr inbounds i8, i8* %A, i16 %add2 + %tmp4 = load i8, i8* %arrayidx, align 4 + %inc = add nsw i8 %tmp4, 1 + store i8 %inc, i8* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i8 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +}