Index: include/polly/ScopInfo.h
===================================================================
--- include/polly/ScopInfo.h
+++ include/polly/ScopInfo.h
@@ -455,6 +455,38 @@
                                 llvm::SmallVectorImpl<MemoryAccess *> &Loads);
   //@}
 
+  /// @brief Derive assumptions about parameter values from GetElementPtrInst
+  ///
+  /// In case a GEP instruction references into a fixed size array e.g., an
+  /// access A[i][j] into an array A[100x100], LLVM-IR does not guarantee that
+  /// the subscripts always compute values that are within array bounds. In this
+  /// function we derive the set of parameter values for which all accesses are
+  /// within bounds and add the assumption that the scop is only every executed
+  /// with this set of parameter values.
+  ///
+  /// Example:
+  ///
+  ///   void foo(float A[10][20], long n, long m {
+  ///     for (long i = 0; i < n; i++)
+  ///       for (long j = 0; j < m; j++)
+  ///         A[i][j] = ...
+  ///
+  /// This loop yields out-of-bound accesses if m is at least 20 and at the same
+  /// time at least one iteration of the outer loop is executed. Hence, we
+  /// assume:
+  ///
+  ///   n <= 0 or m <= 20.
+  ///
+  /// TODO: The location where the GEP instruction is executed is not
+  /// necessarily the location where the memory is actually accessed. As a
+  /// result scanning for GEP[s] is imprecise. Even though this is not a
+  /// correctness problem, this imprecision may result in missed optimizations
+  /// or non-optimal run-time checks.
+  void deriveAssumptionsFromGEP(GetElementPtrInst *Inst);
+
+  /// @brief Scan the scop and derive assumptions about parameter values.
+  void deriveAssumptions();
+
   /// Create the ScopStmt from a BasicBlock.
   ScopStmt(Scop &parent, TempScop &tempScop, const Region &CurRegion,
            BasicBlock &bb, SmallVectorImpl<Loop *> &NestLoops,
Index: lib/Analysis/Dependences.cpp
===================================================================
--- lib/Analysis/Dependences.cpp
+++ lib/Analysis/Dependences.cpp
@@ -122,6 +122,9 @@
     }
     *StmtSchedule = isl_union_map_add_map(*StmtSchedule, Stmt->getScattering());
   }
+
+  *StmtSchedule =
+      isl_union_map_intersect_params(*StmtSchedule, S.getAssumedContext());
 }
 
 /// @brief Fix all dimension of @p Zero to 0 and add it to @p user
Index: lib/Analysis/ScopInfo.cpp
===================================================================
--- lib/Analysis/ScopInfo.cpp
+++ lib/Analysis/ScopInfo.cpp
@@ -847,6 +847,60 @@
   return Domain;
 }
 
+void ScopStmt::deriveAssumptionsFromGEP(GetElementPtrInst *Inst) {
+  int Dimension = 0;
+  isl_ctx *Ctx = Parent.getIslCtx();
+  isl_local_space *LSpace = isl_local_space_from_space(getDomainSpace());
+  Type *Ty = Inst->getPointerOperandType();
+
+  if (auto *PtrTy = dyn_cast<PointerType>(Ty)) {
+    Dimension = 1;
+    Ty = PtrTy->getElementType();
+  }
+
+  while (auto ArrayTy = dyn_cast<ArrayType>(Ty)) {
+    unsigned int Operand = 1 + Dimension;
+
+    if (Inst->getNumOperands() <= Operand)
+      break;
+
+    const SCEV *Expr = Parent.getSE()->getSCEV(Inst->getOperand(1 + Dimension));
+
+    if (isAffineExpr(&Parent.getRegion(), Expr, *Parent.getSE())) {
+      isl_pw_aff *AccessOffset = SCEVAffinator::getPwAff(this, Expr);
+      AccessOffset =
+          isl_pw_aff_set_tuple_id(AccessOffset, isl_dim_in, getDomainId());
+
+      isl_pw_aff *DimSize = isl_pw_aff_from_aff(isl_aff_val_on_domain(
+          isl_local_space_copy(LSpace),
+          isl_val_int_from_si(Ctx, ArrayTy->getNumElements())));
+
+      isl_set *OutOfBound = isl_pw_aff_ge_set(AccessOffset, DimSize);
+      OutOfBound = isl_set_intersect(getDomain(), OutOfBound);
+      OutOfBound = isl_set_params(OutOfBound);
+      isl_set *InBound = isl_set_complement(OutOfBound);
+      isl_set *Executed = isl_set_params(getDomain());
+
+      // A => B == !A or B
+      isl_set *InBoundIfExecuted =
+          isl_set_union(isl_set_complement(Executed), InBound);
+
+      Parent.addAssumption(InBoundIfExecuted);
+    }
+
+    Dimension += 1;
+    Ty = ArrayTy->getElementType();
+  }
+
+  isl_local_space_free(LSpace);
+}
+
+void ScopStmt::deriveAssumptions() {
+  for (Instruction &Inst : *BB)
+    if (auto *GEP = dyn_cast<GetElementPtrInst>(&Inst))
+      deriveAssumptionsFromGEP(GEP);
+}
+
 ScopStmt::ScopStmt(Scop &parent, TempScop &tempScop, const Region &CurRegion,
                    BasicBlock &bb, SmallVectorImpl<Loop *> &Nest,
                    SmallVectorImpl<unsigned> &Scatter)
@@ -867,6 +921,7 @@
   buildScattering(Scatter);
   buildAccesses(tempScop);
   checkForReductions();
+  deriveAssumptions();
 }
 
 /// @brief Collect loads which might form a reduction chain with @p StoreMA
@@ -1530,6 +1585,7 @@
 
 void Scop::addAssumption(__isl_take isl_set *Set) {
   AssumedContext = isl_set_intersect(AssumedContext, Set);
+  AssumedContext = isl_set_coalesce(AssumedContext);
 }
 
 void Scop::printContext(raw_ostream &OS) const {
Index: test/Dependences/sequential_loops.ll
===================================================================
--- test/Dependences/sequential_loops.ll
+++ test/Dependences/sequential_loops.ll
@@ -273,7 +273,7 @@
 ; VALUE:   RAW dependences:
 ; VALUE:     [p] -> {
 ; VALUE:       Stmt_S1[i0] -> Stmt_S2[-p + i0] :
-; VALUE:           i0 >= p and i0 <= 9 + p and i0 >= 0 and i0 <= 99
+; VALUE:           i0 >= p and i0 <= 9 + p and p <= 190 and i0 <= 99 and i0 >= 0
 ; VALUE:     }
 ; VALUE:   WAR dependences:
 ; VALUE:     [p] -> {
Index: test/Isl/Ast/OpenMP/nested_loop_both_parallel_parametric.ll
===================================================================
--- test/Isl/Ast/OpenMP/nested_loop_both_parallel_parametric.ll
+++ test/Isl/Ast/OpenMP/nested_loop_both_parallel_parametric.ll
@@ -41,20 +41,9 @@
   ret void
 }
 
-; At the first look both loops seem parallel, however due to the linearization
-; of memory access functions, we get the following dependences:
-;    [n] -> { loop_body[i0, i1] -> loop_body[1024 + i0, -1 + i1]:
-;                                           0 <= i0 < n - 1024  and 1 <= i1 < n}
-; They cause the outer loop to be non-parallel.  We can only prove their
-; absence, if we know that n < 1024. This information is currently not available
-; to polly. However, we should be able to obtain it due to the out of bounds
-; memory accesses, that would happen if n >= 1024.
-
-; Note that we do not delinearize this access function because it is considered
-; to already be affine: {{0,+,4}<%loop.i>,+,4096}<%loop.j>.
-
-; CHECK: for (int c1 = 0; c1 < n; c1 += 1)
-; CHECK:   #pragma simd
+; CHECK: if (n <= 1024 ? 1 : 0)
 ; CHECK:   #pragma omp parallel for
-; CHECK:   for (int c3 = 0; c3 < n; c3 += 1)
-; CHECK:     Stmt_loop_body(c1, c3);
+; CHECK:   for (int c1 = 0; c1 < n; c1 += 1)
+; CHECK:     #pragma simd
+; CHECK:     for (int c3 = 0; c3 < n; c3 += 1)
+; CHECK:       Stmt_loop_body(c1, c3);
Index: test/Isl/Ast/alias_simple_1.ll
===================================================================
--- test/Isl/Ast/alias_simple_1.ll
+++ test/Isl/Ast/alias_simple_1.ll
@@ -12,11 +12,11 @@
 ;        A[i] = B[i];
 ;    }
 ;
-; NOAA: if (1 && (&MemRef_A[N] <= &MemRef_B[0] || &MemRef_B[N] <= &MemRef_A[0]))
-; BASI: if (1 && (&MemRef_A[N] <= &MemRef_B[0] || &MemRef_B[N] <= &MemRef_A[0]))
-; TBAA: if (1)
-; SCEV: if (1 && (&MemRef_A[N] <= &MemRef_B[0] || &MemRef_B[N] <= &MemRef_A[0]))
-; GLOB: if (1 && (&MemRef_A[N] <= &MemRef_B[0] || &MemRef_B[N] <= &MemRef_A[0]))
+; NOAA: if ((N <= 1024 ? 1 : 0) && (&MemRef_A[N] <= &MemRef_B[0] || &MemRef_B[N] <= &MemRef_A[0]))
+; BASI: if ((N <= 1024 ? 1 : 0) && (&MemRef_A[N] <= &MemRef_B[0] || &MemRef_B[N] <= &MemRef_A[0]))
+; TBAA: if (N <= 1024 ? 1 : 0)
+; SCEV: if ((N <= 1024 ? 1 : 0) && (&MemRef_A[N] <= &MemRef_B[0] || &MemRef_B[N] <= &MemRef_A[0]))
+; GLOB: if ((N <= 1024 ? 1 : 0) && (&MemRef_A[N] <= &MemRef_B[0] || &MemRef_B[N] <= &MemRef_A[0]))
 ;
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
Index: test/Isl/Ast/alias_simple_2.ll
===================================================================
--- test/Isl/Ast/alias_simple_2.ll
+++ test/Isl/Ast/alias_simple_2.ll
@@ -12,11 +12,11 @@
 ;        A[i] = B[i];
 ;    }
 ;
-; NOAA: if (1 && (&MemRef_A[N] <= &MemRef_B[0] || &MemRef_B[N] <= &MemRef_A[0]))
-; BASI: if (1)
-; TBAA: if (1 && (&MemRef_A[N] <= &MemRef_B[0] || &MemRef_B[N] <= &MemRef_A[0]))
-; SCEV: if (1 && (&MemRef_A[N] <= &MemRef_B[0] || &MemRef_B[N] <= &MemRef_A[0]))
-; GLOB: if (1 && (&MemRef_A[N] <= &MemRef_B[0] || &MemRef_B[N] <= &MemRef_A[0]))
+; NOAA: if ((N <= 1024 ? 1 : 0) && (&MemRef_A[N] <= &MemRef_B[0] || &MemRef_B[N] <= &MemRef_A[0]))
+; BASI: if (N <= 1024 ? 1 : 0)
+; TBAA: if ((N <= 1024 ? 1 : 0) && (&MemRef_A[N] <= &MemRef_B[0] || &MemRef_B[N] <= &MemRef_A[0]))
+; SCEV: if ((N <= 1024 ? 1 : 0) && (&MemRef_A[N] <= &MemRef_B[0] || &MemRef_B[N] <= &MemRef_A[0]))
+; GLOB: if ((N <= 1024 ? 1 : 0) && (&MemRef_A[N] <= &MemRef_B[0] || &MemRef_B[N] <= &MemRef_A[0]))
 ;
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
Index: test/Isl/Ast/alias_simple_3.ll
===================================================================
--- test/Isl/Ast/alias_simple_3.ll
+++ test/Isl/Ast/alias_simple_3.ll
@@ -12,11 +12,11 @@
 ;        A[i] = B[i];
 ;    }
 ;
-; NOAA: if (1 && (&MemRef_A[N] <= &MemRef_B[0] || &MemRef_B[N] <= &MemRef_A[0]))
-; BASI: if (1)
-; TBAA: if (1)
-; SCEV: if (1 && (&MemRef_A[N] <= &MemRef_B[0] || &MemRef_B[N] <= &MemRef_A[0]))
-; GLOB: if (1 && (&MemRef_A[N] <= &MemRef_B[0] || &MemRef_B[N] <= &MemRef_A[0]))
+; NOAA: if ((N <= 1024 ? 1 : 0) && (&MemRef_A[N] <= &MemRef_B[0] || &MemRef_B[N] <= &MemRef_A[0]))
+; BASI: if (N <= 1024 ? 1 : 0)
+; TBAA: if (N <= 1024 ? 1 : 0)
+; SCEV: if ((N <= 1024 ? 1 : 0) && (&MemRef_A[N] <= &MemRef_B[0] || &MemRef_B[N] <= &MemRef_A[0]))
+; GLOB: if ((N <= 1024 ? 1 : 0) && (&MemRef_A[N] <= &MemRef_B[0] || &MemRef_B[N] <= &MemRef_A[0]))
 ;
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
Index: test/ScopInfo/assume_gep_bounds.ll
===================================================================
--- /dev/null
+++ test/ScopInfo/assume_gep_bounds.ll
@@ -0,0 +1,76 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+
+;    void foo(float A[10][20][30], long n, long m, long p) {
+;      for (long i = 0; i < n; i++)
+;        for (long j = 0; j < m; j++)
+;          for (long k = 0; k < p; k++)
+;            A[i][j][k] = i + j + k;
+;    }
+
+; For the above code we want to assume that all memory accesses are within the
+; bounds of the array A. In C (and LLVM-IR) this is not required, such that out
+; of bounds accesses are valid. However, as such accesses are uncommon, cause
+; complicated dependence pattern and as a result make dependence analysis more
+; costly and may prevent or hinder useful program transformations, we assume
+; absence of out-of-bound accesses. To do so we derive the set of parameter
+; values for which our assumption holds.
+
+; CHECK: Assumed Context
+; CHECK-NEXT: [n, m, p] -> {  : p <= 30 and m <= 20 }
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @foo([20 x [30 x float]]* %A, i64 %n, i64 %m, i64 %p) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc13, %entry
+  %i.0 = phi i64 [ 0, %entry ], [ %inc14, %for.inc13 ]
+  %cmp = icmp slt i64 %i.0, %n
+  br i1 %cmp, label %for.body, label %for.end15
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc10, %for.body
+  %j.0 = phi i64 [ 0, %for.body ], [ %inc11, %for.inc10 ]
+  %cmp2 = icmp slt i64 %j.0, %m
+  br i1 %cmp2, label %for.body3, label %for.end12
+
+for.body3:                                        ; preds = %for.cond1
+  br label %for.cond4
+
+for.cond4:                                        ; preds = %for.inc, %for.body3
+  %k.0 = phi i64 [ 0, %for.body3 ], [ %inc, %for.inc ]
+  %cmp5 = icmp slt i64 %k.0, %p
+  br i1 %cmp5, label %for.body6, label %for.end
+
+for.body6:                                        ; preds = %for.cond4
+  %add = add nsw i64 %i.0, %j.0
+  %add7 = add nsw i64 %add, %k.0
+  %conv = sitofp i64 %add7 to float
+  %arrayidx9 = getelementptr inbounds [20 x [30 x float]]* %A, i64 %i.0, i64 %j.0, i64 %k.0
+  store float %conv, float* %arrayidx9, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body6
+  %inc = add nsw i64 %k.0, 1
+  br label %for.cond4
+
+for.end:                                          ; preds = %for.cond4
+  br label %for.inc10
+
+for.inc10:                                        ; preds = %for.end
+  %inc11 = add nsw i64 %j.0, 1
+  br label %for.cond1
+
+for.end12:                                        ; preds = %for.cond1
+  br label %for.inc13
+
+for.inc13:                                        ; preds = %for.end12
+  %inc14 = add nsw i64 %i.0, 1
+  br label %for.cond
+
+for.end15:                                        ; preds = %for.cond
+  ret void
+}
Index: test/ScopInfo/assume_gep_bounds_2.ll
===================================================================
--- /dev/null
+++ test/ScopInfo/assume_gep_bounds_2.ll
@@ -0,0 +1,94 @@
+; RUN: opt %loadPolly -basicaa -polly-scops -analyze < %s | FileCheck %s
+;
+;    void foo(float A[restrict 10][20], float B[restrict 10][20], long n, long m,
+;             long p) {
+;      for (long i = 0; i < n; i++)
+;        for (long j = 0; j < m; j++)
+;          A[i][j] = i + j;
+;      for (long i = 0; i < m; i++)
+;        for (long j = 0; j < p; j++)
+;          B[i][j] = i + j;
+;    }
+
+; This code is within bounds either if m and p are smaller than the array sizes,
+; but also if only p is smaller than the size of the second B dimension and n
+; is such that the first loop is never executed and consequently A is never
+; accessed. In this case the value of m does not matter.
+
+; CHECK: Assumed Context:
+; CHECK-NEXT: [n, m, p] -> { : (n <= 0 and p <= 20) or (m <= 20 and p <= 20) }
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @foo([20 x float]* noalias %A, [20 x float]* noalias %B, i64 %n, i64 %m, i64 %p) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc5, %entry
+  %i.0 = phi i64 [ 0, %entry ], [ %inc6, %for.inc5 ]
+  %cmp = icmp slt i64 %i.0, %n
+  br i1 %cmp, label %for.body, label %for.end7
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc, %for.body
+  %j.0 = phi i64 [ 0, %for.body ], [ %inc, %for.inc ]
+  %cmp2 = icmp slt i64 %j.0, %m
+  br i1 %cmp2, label %for.body3, label %for.end
+
+for.body3:                                        ; preds = %for.cond1
+  %add = add nsw i64 %i.0, %j.0
+  %conv = sitofp i64 %add to float
+  %arrayidx4 = getelementptr inbounds [20 x float]* %A, i64 %i.0, i64 %j.0
+  store float %conv, float* %arrayidx4, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body3
+  %inc = add nsw i64 %j.0, 1
+  br label %for.cond1
+
+for.end:                                          ; preds = %for.cond1
+  br label %for.inc5
+
+for.inc5:                                         ; preds = %for.end
+  %inc6 = add nsw i64 %i.0, 1
+  br label %for.cond
+
+for.end7:                                         ; preds = %for.cond
+  br label %for.cond9
+
+for.cond9:                                        ; preds = %for.inc25, %for.end7
+  %i8.0 = phi i64 [ 0, %for.end7 ], [ %inc26, %for.inc25 ]
+  %cmp10 = icmp slt i64 %i8.0, %m
+  br i1 %cmp10, label %for.body12, label %for.end27
+
+for.body12:                                       ; preds = %for.cond9
+  br label %for.cond14
+
+for.cond14:                                       ; preds = %for.inc22, %for.body12
+  %j13.0 = phi i64 [ 0, %for.body12 ], [ %inc23, %for.inc22 ]
+  %cmp15 = icmp slt i64 %j13.0, %p
+  br i1 %cmp15, label %for.body17, label %for.end24
+
+for.body17:                                       ; preds = %for.cond14
+  %add18 = add nsw i64 %i8.0, %j13.0
+  %conv19 = sitofp i64 %add18 to float
+  %arrayidx21 = getelementptr inbounds [20 x float]* %B, i64 %i8.0, i64 %j13.0
+  store float %conv19, float* %arrayidx21, align 4
+  br label %for.inc22
+
+for.inc22:                                        ; preds = %for.body17
+  %inc23 = add nsw i64 %j13.0, 1
+  br label %for.cond14
+
+for.end24:                                        ; preds = %for.cond14
+  br label %for.inc25
+
+for.inc25:                                        ; preds = %for.end24
+  %inc26 = add nsw i64 %i8.0, 1
+  br label %for.cond9
+
+for.end27:                                        ; preds = %for.cond9
+  ret void
+}