Index: include/polly/ScopInfo.h
===================================================================
--- include/polly/ScopInfo.h
+++ include/polly/ScopInfo.h
@@ -814,6 +814,17 @@
   /// this scop and that need to be code generated as a run-time test.
   isl_set *AssumedContext;
 
+  /// @brief The boundary assumptions under which this scop was built.
+  ///
+  /// The boundary context is similar to the assumed context as it contains
+  /// constraints over the parameters we assume to be true. However, the
+  /// boundary context is less useful for dependence analysis and
+  /// simplification purposes as it contains only constraints that affect the
+  /// boundaries of the parameter ranges. As these constraints can become quite
+  /// complex, the boundary context and the assumed context are separated as a
+  /// meassure to save compile time.
+  isl_set *BoundaryContext;
+
   /// @brief The schedule of the SCoP
   ///
   /// The schedule of the SCoP describes the execution order of the statements
@@ -894,8 +905,12 @@
   /// @brief Add the bounds of the parameters to the context.
   void addParameterBounds();
 
-  /// @brief Simplify the assumed context.
-  void simplifyAssumedContext();
+  /// @brief Simplify an assumption context with the context and the domain.
+  __isl_give isl_set *
+  simplifyAssumptionContext(__isl_take isl_set *AssumptionContext) const;
+
+  /// @brief Simplify the assumed and boundary context.
+  void simplifyContexts();
 
   /// @brief Create a new SCoP statement for either @p BB or @p R.
   ///
@@ -1049,6 +1064,21 @@
   ///            to hold.
   void addAssumption(__isl_take isl_set *Set);
 
+  /// @brief Get the boundary context for this Scop.
+  ///
+  /// @return The boundary context of this Scop.
+  __isl_give isl_set *getBoundaryContext() const;
+
+  /// @brief Get the runtime context for this Scop.
+  ///
+  /// The runtime context is the intersection of the assumed and the boundary
+  /// context. It is used in the AST generation to build the runtime checks that
+  /// are used to decide if the optimized or original version of the SCoP will
+  /// be exectued.
+  ///
+  /// @return The runtime context of this Scop.
+  __isl_give isl_set *getRuntimeContext() const;
+
   /// @brief Build all alias groups for this SCoP.
   ///
   /// @returns True if __no__ error occurred, false otherwise.
@@ -1065,6 +1095,9 @@
   /// @brief Get an isl string representing the assumed context.
   std::string getAssumedContextStr() const;
 
+  /// @brief Get an isl string representing the boundary context.
+  std::string getBoundaryContextStr() const;
+
   /// @brief Return the stmt for the given @p BB or nullptr if none.
   ScopStmt *getStmtForBasicBlock(BasicBlock *BB) const;
 
Index: include/polly/Support/SCEVAffinator.h
===================================================================
--- include/polly/Support/SCEVAffinator.h
+++ include/polly/Support/SCEVAffinator.h
@@ -34,6 +34,7 @@
 
 namespace llvm {
 class Region;
+class DataLayout;
 class ScalarEvolution;
 }
 
@@ -57,6 +58,14 @@
   __isl_give isl_pw_aff *getPwAff(const llvm::SCEV *E,
                                   const ScopStmt *Stmt = nullptr);
 
+  /// @brief Compute the context in which integer wrapping is happending.
+  ///
+  ///        This context contains all parameter configurations for which we
+  ///        know that the wrapping and non-wrapping expressions are different.
+  ///
+  /// @returns The context in which integer wrapping is happening.
+  __isl_give isl_set *getWrappingContext() const;
+
 private:
   /// @brief Key to identify cached expressions.
   using CacheKey = std::pair<const llvm::SCEV *, const ScopStmt *>;
@@ -71,6 +80,26 @@
   llvm::ScalarEvolution &SE;
   const ScopStmt *Stmt;
 
+  /// @brief Target data for element size computing.
+  const llvm::DataLayout &TD;
+
+  /// @brief Compute the non-wrapping version of @p PWA for type @p ExprType.
+
+  /// @param PWA  The piece-wise affine function that might wrap.
+  /// @param Type The type of the SCEV that was translated to @p PWA.
+  ///
+  /// @returns The expr @p PWA modulo the size constraints of @p ExprType.
+  __isl_give isl_pw_aff *addModuloSemantic(__isl_take isl_pw_aff *PWA,
+                                           llvm::Type *ExprType) const;
+
+  /// @brief Compute the domain in which integer wrapping for @p PWA happens.
+  ///
+  /// @returns The domain in which integer wrapping happens or nullptr if empty.
+  __isl_give isl_set *getWrappingDomain(llvm::SCEV::NoWrapFlags Flags,
+                                        llvm::Type *ExprType,
+                                        __isl_keep isl_pw_aff *PWA,
+                                        const ScopStmt *Stmt) const;
+
   int getLoopDepth(const llvm::Loop *L);
 
   __isl_give isl_pw_aff *visit(const llvm::SCEV *E);
Index: lib/Analysis/ScopInfo.cpp
===================================================================
--- lib/Analysis/ScopInfo.cpp
+++ lib/Analysis/ScopInfo.cpp
@@ -1162,7 +1162,15 @@
     Stmt.realignParams();
 }
 
-void Scop::simplifyAssumedContext() {
+__isl_give isl_set *
+Scop::simplifyAssumptionContext(__isl_take isl_set *AssumptionContext) const {
+  AssumptionContext = isl_set_gist_params(AssumptionContext,
+                                          isl_union_set_params(getDomains()));
+  AssumptionContext = isl_set_gist_params(AssumptionContext, getContext());
+  return AssumptionContext;
+}
+
+void Scop::simplifyContexts() {
   // The parameter constraints of the iteration domains give us a set of
   // constraints that need to hold for all cases where at least a single
   // statement iteration is executed in the whole scop. We now simplify the
@@ -1191,9 +1199,8 @@
   //   we assume that the condition m <= 0 or (m >= 1 and p >= 0) holds as
   //   otherwise we would access out of bound data. Now, knowing that code is
   //   only executed for the case m >= 0, it is sufficient to assume p >= 0.
-  AssumedContext =
-      isl_set_gist_params(AssumedContext, isl_union_set_params(getDomains()));
-  AssumedContext = isl_set_gist_params(AssumedContext, getContext());
+  AssumedContext = simplifyAssumptionContext(AssumedContext);
+  BoundaryContext = simplifyAssumptionContext(BoundaryContext);
 }
 
 /// @brief Add the minimal/maximal access in @p Set to @p User.
@@ -1464,7 +1471,8 @@
 Scop::Scop(Region &R, ScalarEvolution &ScalarEvolution, isl_ctx *Context,
            unsigned MaxLoopDepth)
     : SE(&ScalarEvolution), R(R), IsOptimized(false),
-      MaxLoopDepth(MaxLoopDepth), IslCtx(Context), Affinator(this) {}
+      MaxLoopDepth(MaxLoopDepth), IslCtx(Context), Affinator(this),
+      BoundaryContext(nullptr) {}
 
 void Scop::initFromTempScop(TempScop &TempScop, LoopInfo &LI,
                             ScopDetection &SD) {
@@ -1480,7 +1488,11 @@
 
   realignParams();
   addParameterBounds();
-  simplifyAssumedContext();
+
+  BoundaryContext = Affinator.getWrappingContext();
+  BoundaryContext = isl_set_intersect_params(BoundaryContext, getContext());
+  BoundaryContext = isl_set_complement(BoundaryContext);
+  simplifyContexts();
 
   assert(NestLoops.empty() && "NestLoops not empty at top level!");
 }
@@ -1498,6 +1510,7 @@
 Scop::~Scop() {
   isl_set_free(Context);
   isl_set_free(AssumedContext);
+  isl_set_free(BoundaryContext);
   isl_schedule_free(Schedule);
 
   // Free the alias groups
@@ -1534,6 +1547,9 @@
 std::string Scop::getAssumedContextStr() const {
   return stringFromIslObj(AssumedContext);
 }
+std::string Scop::getBoundaryContextStr() const {
+  return stringFromIslObj(BoundaryContext);
+}
 
 std::string Scop::getNameStr() const {
   std::string ExitName, EntryName;
@@ -1566,6 +1582,17 @@
   AssumedContext = isl_set_coalesce(AssumedContext);
 }
 
+__isl_give isl_set *Scop::getBoundaryContext() const {
+  return isl_set_copy(BoundaryContext);
+}
+
+__isl_give isl_set *Scop::getRuntimeContext() const {
+  isl_set *RTContext = getAssumedContext();
+  RTContext = isl_set_intersect(RTContext, getBoundaryContext());
+  RTContext = simplifyAssumptionContext(RTContext);
+  return RTContext;
+}
+
 void Scop::printContext(raw_ostream &OS) const {
   OS << "Context:\n";
 
@@ -1584,6 +1611,14 @@
 
   OS.indent(4) << getAssumedContextStr() << "\n";
 
+  OS.indent(4) << "Boundary Context:\n";
+  if (!BoundaryContext) {
+    OS.indent(4) << "n/a\n\n";
+    return;
+  }
+
+  OS.indent(4) << getBoundaryContextStr() << "\n";
+
   for (const SCEV *Parameter : Parameters) {
     int Dim = ParameterIds.find(Parameter)->second;
     OS.indent(4) << "p" << Dim << ": " << *Parameter << "\n";
Index: lib/CodeGen/IslAst.cpp
===================================================================
--- lib/CodeGen/IslAst.cpp
+++ lib/CodeGen/IslAst.cpp
@@ -327,9 +327,9 @@
 
 void IslAst::buildRunCondition(__isl_keep isl_ast_build *Build) {
   // The conditions that need to be checked at run-time for this scop are
-  // available as an isl_set in the AssumedContext from which we can directly
+  // available as an isl_set in the runtime context from which we can directly
   // derive a run-time condition.
-  RunCondition = isl_ast_build_expr_from_set(Build, S->getAssumedContext());
+  RunCondition = isl_ast_build_expr_from_set(Build, S->getRuntimeContext());
 
   // Create the alias checks from the minimal/maximal accesses in each alias
   // group which consists of read only and non read only (read write) accesses.
Index: lib/Support/SCEVAffinator.cpp
===================================================================
--- lib/Support/SCEVAffinator.cpp
+++ lib/Support/SCEVAffinator.cpp
@@ -18,16 +18,20 @@
 #include "polly/Support/ScopHelper.h"
 #include "polly/Support/SCEVValidator.h"
 
+#include "llvm/IR/Module.h"
+
 #include "isl/aff.h"
 #include "isl/set.h"
 #include "isl/val.h"
+#include "isl/union_set.h"
 #include "isl/local_space.h"
 
 using namespace llvm;
 using namespace polly;
 
 SCEVAffinator::SCEVAffinator(Scop *S)
-    : S(S), Ctx(S->getIslCtx()), R(S->getRegion()), SE(*S->getSE()) {}
+    : S(S), Ctx(S->getIslCtx()), R(S->getRegion()), SE(*S->getSE()),
+      TD(R.getEntry()->getParent()->getParent()->getDataLayout()) {}
 
 SCEVAffinator::~SCEVAffinator() {
   for (const auto &CachedPair : CachedExpressions)
@@ -48,6 +52,94 @@
   return visit(Expr);
 }
 
+__isl_give isl_set *
+SCEVAffinator::getWrappingDomain(SCEV::NoWrapFlags Flags, Type *ExprType,
+                                 __isl_take isl_pw_aff *PWA,
+                                 const ScopStmt *Stmt) const {
+  // If the SCEV flags do contain NSW (no signed wrap) then PWA already
+  // represents Expr in modulo semantic (it is not allowed to overflow), thus we
+  // are done. Otherwise, we will compute:
+  //   PWA = ((PWA + 2^(n-1)) mod (2 ^ n)) - 2^(n-1)
+  // whereas n is the number of bits of the Expr, hence:
+  //   n = bitwidth(ExprType)
+
+  if (Flags & SCEV::FlagNSW)
+    return nullptr;
+
+  isl_pw_aff *PWAMod = addModuloSemantic(isl_pw_aff_copy(PWA), ExprType);
+  if (isl_pw_aff_is_equal(PWA, PWAMod)) {
+    isl_pw_aff_free(PWAMod);
+    return nullptr;
+  }
+
+  PWA = isl_pw_aff_copy(PWA);
+
+  if (Stmt) {
+    isl_set *Domain = isl_set_reset_tuple_id(Stmt->getDomain());
+    PWA = isl_pw_aff_intersect_domain(PWA, isl_set_copy(Domain));
+    PWAMod = isl_pw_aff_intersect_domain(PWAMod, Domain);
+  }
+
+  return isl_pw_aff_ne_set(PWA, PWAMod);
+}
+
+__isl_give isl_set *SCEVAffinator::getWrappingContext() const {
+
+  isl_union_set *WrappingDomain = isl_union_set_empty(S->getParamSpace());
+
+  for (const auto &CachedPair : CachedExpressions) {
+    const SCEV *Expr = CachedPair.first.first;
+    SCEV::NoWrapFlags Flags;
+
+    switch (Expr->getSCEVType()) {
+    case scAddExpr:
+      Flags = cast<SCEVAddExpr>(Expr)->getNoWrapFlags();
+      break;
+    case scMulExpr:
+      Flags = cast<SCEVMulExpr>(Expr)->getNoWrapFlags();
+      break;
+    case scAddRecExpr:
+      Flags = cast<SCEVAddRecExpr>(Expr)->getNoWrapFlags();
+      break;
+    default:
+      continue;
+    }
+
+    const ScopStmt *Stmt = CachedPair.first.second;
+    isl_pw_aff *PWA = CachedPair.second;
+
+    isl_set *WDom = getWrappingDomain(Flags, Expr->getType(), PWA, Stmt);
+    if (!WDom)
+      continue;
+
+    WrappingDomain = isl_union_set_add_set(WrappingDomain, WDom);
+  }
+
+  return isl_set_coalesce(isl_union_set_params(WrappingDomain));
+}
+
+__isl_give isl_pw_aff *SCEVAffinator::addModuloSemantic(isl_pw_aff *PWA,
+                                                        Type *ExprType) const {
+  unsigned Width = TD.getTypeStoreSizeInBits(ExprType);
+  isl_ctx *Ctx = isl_pw_aff_get_ctx(PWA);
+
+  isl_val *ModVal = isl_val_int_from_ui(Ctx, Width);
+  ModVal = isl_val_2exp(ModVal);
+
+  isl_val *AddVal = isl_val_int_from_ui(Ctx, Width - 1);
+  AddVal = isl_val_2exp(AddVal);
+
+  isl_set *Domain = isl_pw_aff_domain(isl_pw_aff_copy(PWA));
+
+  isl_pw_aff *AddPW = isl_pw_aff_val_on_domain(Domain, AddVal);
+
+  PWA = isl_pw_aff_add(PWA, isl_pw_aff_copy(AddPW));
+  PWA = isl_pw_aff_mod_val(PWA, ModVal);
+  PWA = isl_pw_aff_sub(PWA, AddPW);
+
+  return PWA;
+}
+
 __isl_give isl_pw_aff *SCEVAffinator::visit(const SCEV *Expr) {
 
   auto Key = std::make_pair(Expr, Stmt);
@@ -74,6 +166,11 @@
   }
 
   PWA = SCEVVisitor<SCEVAffinator, isl_pw_aff *>::visit(Expr);
+
+  // For compile time reasons we need to simplify the PWA before we cache and
+  // return it.
+  PWA = isl_pw_aff_coalesce(PWA);
+
   CachedExpressions[Key] = PWA;
   return isl_pw_aff_copy(PWA);
 }
@@ -124,8 +221,6 @@
     Sum = isl_pw_aff_add(Sum, NextSummand);
   }
 
-  // TODO: Check for NSW and NUW.
-
   return Sum;
 }
 
@@ -166,7 +261,6 @@
         isl_aff_zero_on_domain(LocalSpace), isl_dim_in, loopDimension, 1);
     isl_pw_aff *LPwAff = isl_pw_aff_from_aff(LAff);
 
-    // TODO: Do we need to check for NSW and NUW?
     return isl_pw_aff_mul(Step, LPwAff);
   }
 
Index: test/DependenceInfo/sequential_loops.ll
===================================================================
--- test/DependenceInfo/sequential_loops.ll
+++ test/DependenceInfo/sequential_loops.ll
@@ -272,7 +272,11 @@
 ; VALUE:   RAW dependences:
 ; VALUE:     [p] -> {
 ; VALUE:       Stmt_S1[i0] -> Stmt_S2[-p + i0] :
-; VALUE:           p <= 190 and i0 >= p and i0 <= 9 + p and i0 >= 0 and i0 <= 99
+; VALUE-DAG:           p <= 190
+; VALUE-DAG:           i0 >= p
+; VALUE-DAG:           i0 <= 9 + p
+; VALUE-DAG:           i0 <= 99
+; VALUE-DAG:           i0 >= 0
 ; VALUE:     }
 ; VALUE:   WAR dependences:
 ; VALUE:     [p] -> {
Index: test/Isl/Ast/aliasing_parametric_simple_2.ll
===================================================================
--- test/Isl/Ast/aliasing_parametric_simple_2.ll
+++ test/Isl/Ast/aliasing_parametric_simple_2.ll
@@ -5,7 +5,7 @@
 ;        A[i] = B[c - 10] + B[5];
 ;    }
 ;
-; CHECK: if (1 && (&MemRef_B[c <= 15 ? 6 : c - 9] <= &MemRef_A[0] || &MemRef_A[1024] <= &MemRef_B[c >= 15 ? 5 : c - 10]))
+; CHECK: if (c >= -2147483638 && (&MemRef_B[c <= 15 ? 6 : c - 9] <= &MemRef_A[0] || &MemRef_A[1024] <= &MemRef_B[c >= 15 ? 5 : c - 10]))
 ; CHECK:     for (int c0 = 0; c0 <= 1023; c0 += 1)
 ; CHECK:       Stmt_for_body(c0);
 ; CHECK: else
Index: test/Isl/Ast/simple-run-time-condition.ll
===================================================================
--- test/Isl/Ast/simple-run-time-condition.ll
+++ test/Isl/Ast/simple-run-time-condition.ll
@@ -18,9 +18,9 @@
 ; cause any code to be executed are not generated.
 
 ; CHECK: if (
-; CHECK: (o >= 1 && q <= 0 && m + q >= 0)
-; CHECK: ||
-; CHECK; (o <= 0 && m + q >= 100 && q <= 100)
+; CHECK-DAG: (o >= 1 && n + p <= 9223372036854775808 && q <= 0 && m + q >= 0)
+; CHECK-DAG: ||
+; CHECK-DAG: (o <= 0 && n + p <= 9223372036854775808 && m + q >= 100 && q <= 100)
 ; CHECK: )
 
 ; CHECK:     if (o >= 1) {
Index: test/Isl/CodeGen/aliasing_parametric_simple_2.ll
===================================================================
--- test/Isl/CodeGen/aliasing_parametric_simple_2.ll
+++ test/Isl/CodeGen/aliasing_parametric_simple_2.ll
@@ -5,6 +5,8 @@
 ;        A[i] = B[c - 10] + B[5];
 ;    }
 ;
+; CHECK:  %[[OV0:[._a-zA-Z0-9]*]] = sext i32 %c to i64
+; CHECK:  %[[OV1:[._a-zA-Z0-9]*]] = icmp sge i64 %[[OV0]], -2147483638
 ; CHECK:  %[[M0:[._a-zA-Z0-9]*]] = sext i32 %c to i64
 ; CHECK:  %[[M1:[._a-zA-Z0-9]*]] = icmp sle i64 %[[M0]], 15
 ; CHECK:  %[[M2:[._a-zA-Z0-9]*]] = sext i32 %c to i64
@@ -22,7 +24,7 @@
 ; CHECK:  %[[BMin:[._a-zA-Z0-9]*]] = getelementptr i32, i32* %B, i64 %[[m4]]
 ; CHECK:  %[[AltB:[._a-zA-Z0-9]*]] = icmp ule i32* %[[AMax]], %[[BMin]]
 ; CHECK:  %[[NoAlias:[._a-zA-Z0-9]*]] = or i1 %[[BltA]], %[[AltB]]
-; CHECK:  %[[RTC:[._a-zA-Z0-9]*]] = and i1 true, %[[NoAlias]]
+; CHECK:  %[[RTC:[._a-zA-Z0-9]*]] = and i1 %[[OV1]], %[[NoAlias]]
 ; CHECK:  br i1 %[[RTC]], label %polly.start, label %for.cond
 ;
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
Index: test/Isl/CodeGen/pointer-type-expressions-2.ll
===================================================================
--- test/Isl/CodeGen/pointer-type-expressions-2.ll
+++ test/Isl/CodeGen/pointer-type-expressions-2.ll
@@ -23,7 +23,7 @@
 
 ; Check that we transform this into a pointer difference.
 
-; CODEGEN: %0 = ptrtoint i8* %end to i64
-; CODEGEN: %1 = ptrtoint i8* %start to i64
-; CODEGEN: %2 = sub i64 %0, %1
+; CODEGEN: %[[r0:[._a-zA-Z0-9]]] = ptrtoint i8* %end to i64
+; CODEGEN: %[[r1:[._a-zA-Z0-9]]] = ptrtoint i8* %start to i64
+; CODEGEN: %[[r2:[._a-zA-Z0-9]]] = sub i64 %[[r0]], %[[r1]]
 
Index: test/Isl/CodeGen/pointer-type-expressions.ll
===================================================================
--- test/Isl/CodeGen/pointer-type-expressions.ll
+++ test/Isl/CodeGen/pointer-type-expressions.ll
@@ -41,6 +41,7 @@
 ; CHECK:     Stmt_store(c0);
 ; CHECK: }
 
-; CODEGEN:   %0 = bitcast float* %P to i8*
-; CODEGEN:   %1 = icmp ule i8* %0, inttoptr (i64 -1 to i8*)
+; CODEGEN:   %[[R0:[0-9]*]] = bitcast float* %P to i8*
+; CODEGEN:   %[[R1:[0-9]*]] = bitcast float* %P to i8*
+; CODEGEN-NEXT:   icmp ule i8* %[[R1]], inttoptr (i64 -1 to i8*)
 
Index: test/Isl/CodeGen/two-scops-in-row.ll
===================================================================
--- test/Isl/CodeGen/two-scops-in-row.ll
+++ test/Isl/CodeGen/two-scops-in-row.ll
@@ -3,7 +3,7 @@
 ; RUN: opt %loadPolly -polly-detect-unprofitable -polly-no-early-exit -polly-codegen -polly-ignore-aliasing -disable-output < %s
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
-; SCALAR: if (1)
+; SCALAR: if (Scalar0.val >= -2147483548 && Scalar0.val <= 2147483646)
 ; SCALAR:     {
 ; SCALAR:       for (int c0 = 0; c0 <= -Scalar0.val + 99; c0 += 1)
 ; SCALAR:         Stmt_for_1(c0);
Index: test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_3.ll
===================================================================
--- test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_3.ll
+++ test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_3.ll
@@ -12,7 +12,19 @@
 ; INNERMOST:    Region: %bb15---%bb26
 ; INNERMOST:    Max Loop Depth:  1
 ; INNERMOST:    Context:
-; INNERMOST:      [p_0, p_1, p_2] -> {  : p_0 >= 0 and p_0 <= 2147483647 and p_1 >= 0 and p_1 <= 4096 and p_2 >= 0 and p_2 <= 4096 }
+; INNERMOST:      [p_0, p_1, p_2] -> {  :
+; INNERMOST-DAG:    p_0 >= 0
+; INNERMOST-DAG:      and
+; INNERMOST-DAG:    p_0 <= 2147483647
+; INNERMOST-DAG:      and
+; INNERMOST-DAG:    p_1 >= 0
+; INNERMOST-DAG:      and
+; INNERMOST-DAG:     p_1 <= 4096
+; INNERMOST-DAG:      and
+; INNERMOST-DAG:    p_2 >= 0
+; INNERMOST-DAG:      and
+; INNERMOST-DAG:     p_2 <= 4096
+; INNERMOST:                         }
 ; INNERMOST:    Assumed Context:
 ; INNERMOST:      [p_0, p_1, p_2] -> {  :  }
 ; INNERMOST:    p0: {0,+,{0,+,1}<nuw><nsw><%bb11>}<nuw><nsw><%bb13>
Index: test/ScopInfo/NonAffine/non_affine_conditional_surrounding_non_affine_loop.ll
===================================================================
--- test/ScopInfo/NonAffine/non_affine_conditional_surrounding_non_affine_loop.ll
+++ test/ScopInfo/NonAffine/non_affine_conditional_surrounding_non_affine_loop.ll
@@ -5,7 +5,11 @@
 ; INNERMOST:    Region: %bb9---%bb18
 ; INNERMOST:    Max Loop Depth:  1
 ; INNERMOST:    Context:
-; INNERMOST:    [p_0] -> {  : p_0 >= -2199023255552 and p_0 <= 2199023254528 }
+; INNERMOST:    [p_0] -> {  :
+; INNERMOST-DAG:   p_0 >= -2199023255552
+; INNERMOST-DAG:    and 
+; INNERMOST-DAG:   p_0 <= 2199023254528
+; INNERMOST:             }
 ; INNERMOST:    Assumed Context:
 ; INNERMOST:    [p_0] -> {  :  }
 ; INNERMOST:    p0: {0,+,(sext i32 %N to i64)}<%bb3>
Index: test/ScopInfo/assume_gep_bounds.ll
===================================================================
--- test/ScopInfo/assume_gep_bounds.ll
+++ test/ScopInfo/assume_gep_bounds.ll
@@ -20,6 +20,8 @@
 ; CHECK-DAG:                    p <= 30
 ; CHECK-DAG:                     and
 ; CHECK-DAG:                    m <= 20
+; CHECK-DAG:                     and
+; CHECK-DAG:                    p <= 2305843009213694582 - 600n - 30m
 ; CHECK:                   }
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
Index: test/ScopInfo/assume_gep_bounds_2.ll
===================================================================
--- test/ScopInfo/assume_gep_bounds_2.ll
+++ test/ScopInfo/assume_gep_bounds_2.ll
@@ -16,10 +16,14 @@
 ; accessed. In this case the value of m does not matter.
 
 ; CHECK: Assumed Context:
-; CHECK-NEXT: [n, m, p] -> { :
-; CHECK-DAG:                   (n >= 1 and m <= 20 and p <= 20)
+; CHECK-NEXT: [n, m, p] -> {  : (n <= 0 and p <= 20) or (n >= 1 and m <= 20 and p <= 20) }
+; CHECK: Boundary Context:
+; CHECK-NEXT: [n, m, p] -> {  :
+; CHECK-DAG:                   (n <= 0 and p <= 0)
 ; CHECK-DAG:                    or
-; CHECK-DAG:                   (n <= 0 and p <= 20)
+; CHECK-DAG:                   (n >= 1 and m <= 2305843009213693972 - 20n and p <= 0)
+; CHECK-DAG:                    or
+; CHECK-DAG:                   (n >= 1 and m <= 2305843009213693972 - 20n and p <= 2305843009213693972 - 20m and p >= 1)
 ; CHECK:                   }
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
Index: test/ScopInfo/loop_carry.ll
===================================================================
--- test/ScopInfo/loop_carry.ll
+++ test/ScopInfo/loop_carry.ll
@@ -45,8 +45,6 @@
   ret i64 0
 }
 
-; CHECK: Context:
-; CHECK: [n] -> { : }
 ; CHECK:     Statements {
 ; CHECK:     	Stmt_bb
 ; CHECK:             Domain :=
Index: test/ScopInfo/multidim_2d_outer_parametric_offset.ll
===================================================================
--- test/ScopInfo/multidim_2d_outer_parametric_offset.ll
+++ test/ScopInfo/multidim_2d_outer_parametric_offset.ll
@@ -10,7 +10,7 @@
 ; }
 
 ; CHECK:  Assumed Context:
-; CHECK:  [m, p] -> {  : }
+; CHECK:  [m, p] -> {  : p <= 9223372036854775708 }
 ; CHECK:  p0: %m
 ; CHECK:  p1: %p
 ; CHECK:  Statements {
Index: test/ScopInfo/pointer-type-expressions.ll
===================================================================
--- test/ScopInfo/pointer-type-expressions.ll
+++ test/ScopInfo/pointer-type-expressions.ll
@@ -19,7 +19,7 @@
   br i1 %brcond, label %store, label %bb.backedge
 
 store:
-  %scevgep = getelementptr i64, i64* %a, i64 %i
+  %scevgep = getelementptr inbounds i64, i64* %a, i64 %i
   store i64 %i, i64* %scevgep
   br label %bb.backedge
 
Index: test/ScopInfo/ranged_parameter.ll
===================================================================
--- test/ScopInfo/ranged_parameter.ll
+++ test/ScopInfo/ranged_parameter.ll
@@ -4,7 +4,11 @@
 ; range metadata (see bottom of the file) are present:
 ;
 ; CHECK: Context:
-; CHECK:   [p_0] -> {  : p_0 >= 0 and p_0 <= 255 }
+; CHECK:   [p_0] -> {  :
+; CHECK-DAG:             p_0 >= 0
+; CHECK-DAG:              and
+; CHECK-DAG:             p_0 <= 255
+; CHECK:            }
 ;
 ;    void jd(int *A, int *p /* in [0,256) */) {
 ;      for (int i = 0; i < 1024; i++)
Index: test/ScopInfo/simple_loop_1.ll
===================================================================
--- test/ScopInfo/simple_loop_1.ll
+++ test/ScopInfo/simple_loop_1.ll
@@ -14,7 +14,7 @@
 
 bb:                                               ; preds = %bb, %entry
   %i = phi i64 [ 0, %entry ], [ %i.inc, %bb ]
-  %scevgep = getelementptr i64, i64* %a, i64 %i
+  %scevgep = getelementptr inbounds i64, i64* %a, i64 %i
   store i64 %i, i64* %scevgep
   %i.inc = add nsw i64 %i, 1
   %exitcond = icmp eq i64 %i.inc, %N
Index: test/ScopInfo/unsigned-condition.ll
===================================================================
--- test/ScopInfo/unsigned-condition.ll
+++ test/ScopInfo/unsigned-condition.ll
@@ -19,7 +19,7 @@
   br i1 %brcond, label %store, label %bb.backedge
 
 store:
-  %scevgep = getelementptr i64, i64* %a, i64 %i
+  %scevgep = getelementptr inbounds i64, i64* %a, i64 %i
   store i64 %i, i64* %scevgep
   br label %bb.backedge
 
Index: test/ScopInfo/wraping_signed_expr_0.ll
===================================================================
--- /dev/null
+++ test/ScopInfo/wraping_signed_expr_0.ll
@@ -0,0 +1,71 @@
+; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s
+;
+;    void f(int *A, char N, char p) {
+;      for (char i = 0; i < N; i++) {
+;        A[i + 3] = 0;
+;      }
+;    }
+;
+; The wrap function has no inbounds GEP but the nowrap function has. Therefore,
+; we will add the assumption that i+1 won't overflow only to the former.
+;
+; CHECK:      Function: wrap
+; CHECK:      Boundary Context:
+; CHECK:      [N] -> {  : N <= 125 }
+;
+;
+; FIXME: This is a negative test as nowrap should not need an assumed context.
+;        However %tmp5 in @nowrap is translated to the SCEV <3,+,1><nw><%bb2>
+;        which lacks the <nsw> flags we would need to avoid runtime checks.
+;
+; CHECK:      Function: nowrap
+; CHECK:      Boundary Context:
+; CHECK-NOT:  [N] -> {  :  }
+;
+target datalayout = "e-m:e-i8:64-f80:128-n8:16:32:64-S128"
+
+define void @wrap(i32* %A, i8 %N, i8 %p) {
+bb:
+  br label %bb2
+
+bb2:                                              ; preds = %bb7, %bb
+  %indvars.iv = phi i8 [ %indvars.iv.next, %bb7 ], [ 0, %bb ]
+  %tmp3 = icmp slt i8 %indvars.iv, %N
+  br i1 %tmp3, label %bb4, label %bb8
+
+bb4:                                              ; preds = %bb2
+  %tmp5 = add i8 %indvars.iv, 3
+  %tmp6 = getelementptr i32, i32* %A, i8 %tmp5
+  store i32 0, i32* %tmp6, align 4
+  br label %bb7
+
+bb7:                                              ; preds = %bb4
+  %indvars.iv.next = add nsw nuw i8 %indvars.iv, 1
+  br label %bb2
+
+bb8:                                              ; preds = %bb2
+  ret void
+}
+
+define void @nowrap(i32* %A, i8 %N, i8 %p) {
+bb:
+  br label %bb2
+
+bb2:                                              ; preds = %bb7, %bb
+  %indvars.iv = phi i8 [ %indvars.iv.next, %bb7 ], [ 0, %bb ]
+  %tmp3 = icmp slt i8 %indvars.iv, %N
+  br i1 %tmp3, label %bb4, label %bb8
+
+bb4:                                              ; preds = %bb2
+  %tmp5 = add nsw nuw i8 %indvars.iv, 3
+  %tmp6 = getelementptr inbounds i32, i32* %A, i8 %tmp5
+  store i32 0, i32* %tmp6, align 4
+  br label %bb7
+
+bb7:                                              ; preds = %bb4
+  %indvars.iv.next = add nsw nuw i8 %indvars.iv, 1
+  br label %bb2
+
+bb8:                                              ; preds = %bb2
+  ret void
+}
Index: test/ScopInfo/wraping_signed_expr_1.ll
===================================================================
--- /dev/null
+++ test/ScopInfo/wraping_signed_expr_1.ll
@@ -0,0 +1,72 @@
+; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s
+;
+;    void f(long *A, long N, long p) {
+;      for (long i = 0; i < N; i++)
+;        A[i + 1] = 0;
+;    }
+;
+; The wrap function has no inbounds GEP but the nowrap function has. Therefore,
+; we will add the assumption that i+1 won't overflow only to the former.
+;
+; Note:
+;       1152921504606846975 * sizeof(long) <= 2 ^ 63 - 1
+;  and
+;       1152921504606846976 * sizeof(long) >  2 ^ 63 - 1
+; with
+;       sizeof(long) == 8
+;
+; CHECK:      Function: wrap
+; CHECK:      Boundary Context:
+; CHECK:      [N] -> {  : N <= 1152921504606846975 }
+;
+; CHECK:      Function: nowrap
+; CHECK:      Boundary Context:
+; CHECK:      [N] -> {  :  }
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @wrap(i64* %A, i64 %N, i64 %p) {
+bb:
+  br label %bb2
+
+bb2:                                              ; preds = %bb7, %bb
+  %indvars.iv = phi i64 [ %indvars.iv.next, %bb7 ], [ 0, %bb ]
+  %tmp3 = icmp slt i64 %indvars.iv, %N
+  br i1 %tmp3, label %bb4, label %bb8
+
+bb4:                                              ; preds = %bb2
+  %tmp5 = add nsw nuw i64 %indvars.iv, 1
+  %tmp6 = getelementptr i64, i64* %A, i64 %tmp5
+  store i64 0, i64* %tmp6, align 4
+  br label %bb7
+
+bb7:                                              ; preds = %bb4
+  %indvars.iv.next = add nsw nuw i64 %indvars.iv, 1
+  br label %bb2
+
+bb8:                                              ; preds = %bb2
+  ret void
+}
+
+define void @nowrap(i64* %A, i64 %N, i64 %p) {
+bb:
+  br label %bb2
+
+bb2:                                              ; preds = %bb7, %bb
+  %indvars.iv = phi i64 [ %indvars.iv.next, %bb7 ], [ 0, %bb ]
+  %tmp3 = icmp slt i64 %indvars.iv, %N
+  br i1 %tmp3, label %bb4, label %bb8
+
+bb4:                                              ; preds = %bb2
+  %tmp5 = add nsw nuw i64 %indvars.iv, 1
+  %tmp6 = getelementptr inbounds i64, i64* %A, i64 %tmp5
+  store i64 0, i64* %tmp6, align 4
+  br label %bb7
+
+bb7:                                              ; preds = %bb4
+  %indvars.iv.next = add nsw nuw i64 %indvars.iv, 1
+  br label %bb2
+
+bb8:                                              ; preds = %bb2
+  ret void
+}
Index: test/ScopInfo/wraping_signed_expr_2.ll
===================================================================
--- /dev/null
+++ test/ScopInfo/wraping_signed_expr_2.ll
@@ -0,0 +1,42 @@
+; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s
+;
+;    void f(int *A, int N, int p) {
+;      for (int i = 0; i < N; i++)
+;        A[i + 30] = 0;
+;    }
+;
+; The wrap function has no inbounds GEP but the nowrap function has. Therefore,
+; we will add the assumption that i+1 won't overflow only to the former.
+;
+; Note: 2147483618 + 30 == 2 ^ 31
+;
+; CHECK:      Function: wrap
+; CHECK:      Context:
+; CHECK:        [N] -> {  : N <= 2147483647 and N >= -2147483648 }
+; CHECK:      Boundary Context:
+; CHECK:        [N] -> {  : N <= 2147483618 }
+;
+target datalayout = "e-m:e-i32:64-f80:128-n8:16:32:64-S128"
+
+define void @wrap(i32* %A, i32 %N, i32 %p) {
+bb:
+  br label %bb2
+
+bb2:                                              ; preds = %bb7, %bb
+  %indvars.iv = phi i32 [ %indvars.iv.next, %bb7 ], [ 0, %bb ]
+  %tmp3 = icmp slt i32 %indvars.iv, %N
+  br i1 %tmp3, label %bb4, label %bb8
+
+bb4:                                              ; preds = %bb2
+  %tmp5 = add i32 %indvars.iv, 30
+  %tmp6 = getelementptr i32, i32* %A, i32 %tmp5
+  store i32 0, i32* %tmp6, align 4
+  br label %bb7
+
+bb7:                                              ; preds = %bb4
+  %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
+  br label %bb2
+
+bb8:                                              ; preds = %bb2
+  ret void
+}
Index: test/ScopInfo/wraping_signed_expr_3.ll
===================================================================
--- /dev/null
+++ test/ScopInfo/wraping_signed_expr_3.ll
@@ -0,0 +1,37 @@
+; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s
+;
+;    void f(int *A, int N, int p) {
+;      for (int i = 0; i < N; i++)
+;        A[i + p] = 0;
+;    }
+;
+; Note: 2147483648 == 2 ^ 31
+;
+; CHECK:      Function: wrap
+; CHECK:      Boundary Context:
+; CHECK:      [N, p] -> {  : p <= 2147483648 - N }
+;
+target datalayout = "e-m:e-i32:64-f80:128-n8:16:32:64-S128"
+
+define void @wrap(i32* %A, i32 %N, i32 %p) {
+bb:
+  br label %bb2
+
+bb2:                                              ; preds = %bb7, %bb
+  %indvars.iv = phi i32 [ %indvars.iv.next, %bb7 ], [ 0, %bb ]
+  %tmp3 = icmp slt i32 %indvars.iv, %N
+  br i1 %tmp3, label %bb4, label %bb8
+
+bb4:                                              ; preds = %bb2
+  %tmp5 = add i32 %indvars.iv, %p
+  %tmp6 = getelementptr inbounds i32, i32* %A, i32 %tmp5
+  store i32 0, i32* %tmp6, align 4
+  br label %bb7
+
+bb7:                                              ; preds = %bb4
+  %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
+  br label %bb2
+
+bb8:                                              ; preds = %bb2
+  ret void
+}
Index: test/ScopInfo/wraping_signed_expr_4.ll
===================================================================
--- /dev/null
+++ test/ScopInfo/wraping_signed_expr_4.ll
@@ -0,0 +1,37 @@
+; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s
+;
+;    void f(char *A, char N, char p) {
+;      for (char i = 0; i < N; i++)
+;        A[p-1] = 0;
+;    }
+;
+; CHECK:    Function: wrap
+; CHECK:    Context:
+; CHECK:        [N, p] -> {  : N <= 127 and N >= -128 and p <= 127 and p >= -128 }
+; CHECK:    Boundary Context:
+; CHECK:        [N, p] -> {  : p >= -127 }
+;
+target datalayout = "e-m:e-i8:64-f80:128-n8:16:32:64-S128"
+
+define void @wrap(i8* %A, i8 %N, i8 %p) {
+bb:
+  br label %bb2
+
+bb2:                                              ; preds = %bb7, %bb
+  %indvars.iv = phi i8 [ %indvars.iv.next, %bb7 ], [ 0, %bb ]
+  %tmp3 = icmp slt i8 %indvars.iv, %N
+  br i1 %tmp3, label %bb4, label %bb8
+
+bb4:                                              ; preds = %bb2
+  %tmp5 = add i8 %p, -1
+  %tmp6 = getelementptr i8, i8* %A, i8 %tmp5
+  store i8 0, i8* %tmp6, align 4
+  br label %bb7
+
+bb7:                                              ; preds = %bb4
+  %indvars.iv.next = add nuw nsw i8 %indvars.iv, 1
+  br label %bb2
+
+bb8:                                              ; preds = %bb2
+  ret void
+}
Index: test/ScopInfo/wraping_signed_expr_5.ll
===================================================================
--- /dev/null
+++ test/ScopInfo/wraping_signed_expr_5.ll
@@ -0,0 +1,48 @@
+; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s
+;
+; We should not generate runtime check for ((int)r1 + (int)r2) as it is known not
+; to overflow. However (p + q) can, thus checks are needed.
+;
+; CHECK:     Boundary Context:
+; CHECK:     [r1, r2, q, p] -> {
+; CHECK-DAG:      p <= 2147483647 - q
+; CHECK-DAG:        and
+; CHECK-DAG:      p >= -2147483648 - q
+; CHECK:     }
+;
+;    void wraps(int *A, int p, short q, char r1, char r2) {
+;      for (char i = r1; i < r2; i++)
+;        A[p + q] = A[(int)r1 + (int)r2];
+;    }
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @wraps(i32* %A, i32 %p, i16 signext %q, i8 signext %r1, i8 signext %r2) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %i.0 = phi i8 [ %r1, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i8 %i.0, %r2
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %conv3 = sext i8 %r1 to i64
+  %conv4 = sext i8 %r2 to i64
+  %add = add nsw i64 %conv3, %conv4
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %add
+  %tmp = load i32, i32* %arrayidx, align 4
+  %conv5 = sext i16 %q to i32
+  %add6 = add nsw i32 %conv5, %p
+  %idxprom7 = sext i32 %add6 to i64
+  %arrayidx8 = getelementptr inbounds i32, i32* %A, i64 %idxprom7
+  store i32 %tmp, i32* %arrayidx8, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %inc = add i8 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
Index: test/ScopInfo/wraping_signed_expr_slow_1.ll
===================================================================
--- /dev/null
+++ test/ScopInfo/wraping_signed_expr_slow_1.ll
@@ -0,0 +1,81 @@
+; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s
+;
+; This checks that the no-wraps checks will be computed fast as some example
+; already showed huge slowdowns even though the inbounds and nsw flags were
+; all in place.
+;
+;    // Inspired by itrans8x8 in transform8x8.c from the ldecode benchmark.
+;    void fast(char *A, char N, char M) {
+;      for (char i = 0; i < 8; i++) {
+;        short index0 = (short)(i + N);
+;        #ifdef fast
+;          short index1 = (index0 *  1) + (short)M;
+;        #else
+;          short index1 = (index0 * 16) + (short)M;
+;        #endif
+;        A[index1]++;
+;      }
+;    }
+;
+; CHECK: Function: fast
+; CHECK: Function: slow
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @fast(i8* %A, i8 %N, i8 %M) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %indvars.iv = phi i8 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %exitcond = icmp ne i8 %indvars.iv, 8
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %tmp3 = add nsw i8 %indvars.iv, %N
+  %tmp3ext = sext i8 %tmp3 to i16
+  ;%mul = mul nsw i16 %tmp3ext, 16
+  %Mext = sext i8 %M to i16
+  %add2 = add nsw i16 %tmp3ext, %Mext
+  %arrayidx = getelementptr inbounds i8, i8* %A, i16 %add2
+  %tmp4 = load i8, i8* %arrayidx, align 4
+  %inc = add nsw i8 %tmp4, 1
+  store i8 %inc, i8* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %indvars.iv.next = add nuw nsw i8 %indvars.iv, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+define void @slow(i8* %A, i8 %N, i8 %M) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %indvars.iv = phi i8 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %exitcond = icmp ne i8 %indvars.iv, 8
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %tmp3 = add nsw i8 %indvars.iv, %N
+  %tmp3ext = sext i8 %tmp3 to i16
+  %mul = mul nsw i16 %tmp3ext, 16
+  %Mext = sext i8 %M to i16
+  %add2 = add nsw i16 %mul, %Mext
+  %arrayidx = getelementptr inbounds i8, i8* %A, i16 %add2
+  %tmp4 = load i8, i8* %arrayidx, align 4
+  %inc = add nsw i8 %tmp4, 1
+  store i8 %inc, i8* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %indvars.iv.next = add nuw nsw i8 %indvars.iv, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
Index: test/ScopInfo/wraping_signed_expr_slow_2.ll
===================================================================
--- /dev/null
+++ test/ScopInfo/wraping_signed_expr_slow_2.ll
@@ -0,0 +1,86 @@
+; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s
+;
+; This checks that the no-wraps checks will be computed fast as some example
+; already showed huge slowdowns even though the inbounds and nsw flags were
+; all in place.
+;
+;    // Inspired by itrans8x8 in transform8x8.c from the ldecode benchmark.
+;    void fast(char *A, char N, char M) {
+;      for (char i = 0; i < 8; i++) {
+;        char  index0 = i + N;
+;        char  index1 = index0 * 16;
+;        char  index2 = index1 + M;
+;        A[(short)index2]++;
+;      }
+;    }
+;
+;    void slow(char *A, char N, char M) {
+;      for (char i = 0; i < 8; i++) {
+;        char  index0 = i + N;
+;        char  index1 = index0 * 16;
+;        short index2 = ((short)index1) + ((short)M);
+;        A[index2]++;
+;      }
+;    }
+;
+; CHECK: Function: fast
+; CHECK: Function: slow
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @fast(i8* %A, i8 %N, i8 %M) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %indvars.iv = phi i8 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %exitcond = icmp ne i8 %indvars.iv, 8
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %tmp3 = add nsw i8 %indvars.iv, %N
+  %mul = mul nsw i8 %tmp3, 16
+  %add2 = add nsw i8 %mul, %M
+  %add2ext = sext i8 %add2 to i16
+  %arrayidx = getelementptr inbounds i8, i8* %A, i16 %add2ext
+  %tmp4 = load i8, i8* %arrayidx, align 4
+  %inc = add nsw i8 %tmp4, 1
+  store i8 %inc, i8* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %indvars.iv.next = add nuw nsw i8 %indvars.iv, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+define void @slow(i8* %A, i8 %N, i8 %M) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %indvars.iv = phi i8 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %exitcond = icmp ne i8 %indvars.iv, 8
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %tmp3 = add nsw i8 %indvars.iv, %N
+  %mul = mul nsw i8 %tmp3, 16
+  %mulext = sext i8 %mul to i16
+  %Mext = sext i8 %M to i16
+  %add2 = add nsw i16 %mulext, %Mext
+  %arrayidx = getelementptr inbounds i8, i8* %A, i16 %add2
+  %tmp4 = load i8, i8* %arrayidx, align 4
+  %inc = add nsw i8 %tmp4, 1
+  store i8 %inc, i8* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %indvars.iv.next = add nuw nsw i8 %indvars.iv, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}