Index: include/polly/TempScopInfo.h
===================================================================
--- include/polly/TempScopInfo.h
+++ include/polly/TempScopInfo.h
@@ -54,12 +54,17 @@
 public:
   SmallVector<const SCEV *, 4> Subscripts, Sizes;
 
+  /// @brief The lower/upper bound for non affine accesses.
+  const SCEV *LB, *UB;
+
   explicit IRAccess(TypeKind Type, Value *BaseAddress, const SCEV *Offset,
                     unsigned elemBytes, bool Affine,
                     SmallVector<const SCEV *, 4> Subscripts,
-                    SmallVector<const SCEV *, 4> Sizes)
+                    SmallVector<const SCEV *, 4> Sizes,
+                    const SCEV *LB = nullptr, const SCEV *UB = nullptr)
       : BaseAddress(BaseAddress), Offset(Offset), ElemBytes(elemBytes),
-        Type(Type), IsAffine(Affine), Subscripts(Subscripts), Sizes(Sizes) {}
+        Type(Type), IsAffine(Affine), Subscripts(Subscripts), Sizes(Sizes),
+        LB(LB), UB(UB) {}
 
   enum TypeKind getType() const { return Type; }
 
@@ -79,6 +84,12 @@
 
   bool isScalar() const { return Subscripts.size() == 0; }
 
+  /// @brief Return the lower bound of this access (for non-affine ones).
+  const SCEV *getLB() const { return LB; }
+
+  /// @brief Return the upper bound of this access (for non-affine ones).
+  const SCEV *getUB() const { return UB; }
+
   void print(raw_ostream &OS) const;
 };
 
@@ -285,6 +296,15 @@
   ///             instruction.
   IRAccess buildIRAccess(Instruction *Inst, Loop *L, Region *R);
 
+  /// @brief Build an instance of IRAccess if the call can access the memory.
+  ///
+  /// @param CI   The call instruction that might access the memory
+  /// @param L    The parent loop of the instruction
+  /// @param R    The region on which we are going to build a TempScop
+  /// @param Accs The set of accesses in which we accumulate IRAccesses
+  void buildIRCallAccess(CallInst *CI, Loop *L, Region *R,
+                         AccFuncSetType &Accs);
+
   /// @brief Analyze and extract the cross-BB scalar dependences (or,
   ///        dataflow dependencies) of an instruction.
   ///
Index: lib/Analysis/ScopDetection.cpp
===================================================================
--- lib/Analysis/ScopDetection.cpp
+++ lib/Analysis/ScopDetection.cpp
@@ -300,7 +300,7 @@
 }
 
 bool ScopDetection::isValidCallInst(CallInst &CI) {
-  if (CI.mayHaveSideEffects() || CI.doesNotReturn())
+  if (CI.doesNotReturn())
     return false;
 
   if (CI.doesNotAccessMemory())
@@ -359,6 +359,10 @@
     case llvm::Intrinsic::donothing:
     case llvm::Intrinsic::assume:
     case llvm::Intrinsic::expect:
+    // Memory intrinsics can be modeled, thus are supported.
+    case llvm::Intrinsic::memmove:
+    case llvm::Intrinsic::memset:
+    case llvm::Intrinsic::memcpy:
       return true;
     default:
       // Other intrinsics are not supported.
Index: lib/Analysis/ScopInfo.cpp
===================================================================
--- lib/Analysis/ScopInfo.cpp
+++ lib/Analysis/ScopInfo.cpp
@@ -71,7 +71,9 @@
   /// @param Stmt The location at which the scalar evolution expression
   ///             is evaluated.
   /// @param Expr The expression that is translated.
-  static __isl_give isl_pw_aff *getPwAff(ScopStmt *Stmt, const SCEV *Expr);
+  /// @param NbLS The number of input dimensions if not the same as @p Stmt.
+  static __isl_give isl_pw_aff *getPwAff(ScopStmt *Stmt, const SCEV *Expr,
+                                         int NbLS = -1);
 
 private:
   isl_ctx *Ctx;
@@ -101,14 +103,17 @@
     : Ctx(Stmt->getIslCtx()), NbLoopSpaces(Stmt->getNumIterators()),
       S(Stmt->getParent()) {}
 
-__isl_give isl_pw_aff *SCEVAffinator::getPwAff(ScopStmt *Stmt,
-                                               const SCEV *Scev) {
+__isl_give isl_pw_aff *SCEVAffinator::getPwAff(ScopStmt *Stmt, const SCEV *Scev,
+                                               int NbLS) {
   Scop *S = Stmt->getParent();
   const Region *Reg = &S->getRegion();
 
   S->addParams(getParamsInAffineExpr(Reg, Scev, *S->getSE()));
 
   SCEVAffinator Affinator(Stmt);
+  if (NbLS != -1)
+    Affinator.NbLoopSpaces = NbLS;
+
   return Affinator.visit(Scev);
 }
 
@@ -436,14 +441,48 @@
   BaseName = getIslCompatibleName("MemRef_", getBaseAddr(), "");
   isl_id *BaseAddrId = isl_id_alloc(Ctx, getBaseName().c_str(), nullptr);
 
+  // Non affine accesses are either real memory accesses with access functions
+  // we can not analyze or special instructions (e.g., memset intrinsics) which
+  // write and/or read memory. The former will be overaproximatied (e.g., we
+  // model the access to the whole array) while the later comes with bounds.
+  // Since the overaproximated accesses might not write all array cells they
+  // are marked as MAY_WRITE.
   if (!Access.isAffine()) {
-    // We overapproximate non-affine accesses with a possible access to the
-    // whole array. For read accesses it does not make a difference, if an
-    // access must or may happen. However, for write accesses it is important to
-    // differentiate between writes that must happen and writes that may happen.
+    isl_set *LBSet, *UBSet, *Range;
+    isl_local_space *LocalSpace;
+    isl_space *Space;
+    isl_pw_aff *IV;
+    isl_aff *Zero;
+
     AccessRelation = isl_map_from_basic_map(createBasicAccessMap(Statement));
-    AccessRelation =
-        isl_map_set_tuple_id(AccessRelation, isl_dim_out, BaseAddrId);
+    AccessRelation = isl_map_set_tuple_id(AccessRelation, isl_dim_out,
+                                          isl_id_copy(BaseAddrId));
+
+    Range = isl_map_range(isl_map_copy(AccessRelation));
+    Space = isl_set_get_space(Range);
+    LocalSpace = isl_local_space_from_space(
+        isl_space_reset_tuple_id(Space, isl_dim_set));
+    Zero = isl_aff_zero_on_domain(LocalSpace);
+    IV =
+        isl_pw_aff_from_aff(isl_aff_set_coefficient_si(Zero, isl_dim_in, 0, 1));
+
+    if (const SCEV *LB = Access.getLB()) {
+      LBSet = isl_pw_aff_ge_set(isl_pw_aff_copy(IV),
+                                SCEVAffinator::getPwAff(Statement, LB, 1));
+      LBSet = isl_set_set_tuple_id(LBSet, isl_id_copy(BaseAddrId));
+      AccessRelation = isl_map_intersect_range(AccessRelation, LBSet);
+    }
+
+    if (const SCEV *UB = Access.getUB()) {
+      UBSet = isl_pw_aff_le_set(isl_pw_aff_copy(IV),
+                                SCEVAffinator::getPwAff(Statement, UB, 1));
+      UBSet = isl_set_set_tuple_id(UBSet, isl_id_copy(BaseAddrId));
+      AccessRelation = isl_map_intersect_range(AccessRelation, UBSet);
+    }
+
+    isl_id_free(BaseAddrId);
+    isl_set_free(Range);
+    isl_pw_aff_free(IV);
     return;
   }
 
Index: lib/Analysis/TempScopInfo.cpp
===================================================================
--- lib/Analysis/TempScopInfo.cpp
+++ lib/Analysis/TempScopInfo.cpp
@@ -28,6 +28,7 @@
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/Debug.h"
 
 using namespace llvm;
@@ -188,6 +189,77 @@
                   Subscripts, Sizes);
 }
 
+void TempScopInfo::buildIRCallAccess(CallInst *CI, Loop *L, Region *R,
+                                     AccFuncSetType &Accs) {
+  // Trivial calls can be ignored.
+  if (CI->doesNotAccessMemory())
+    return;
+
+  SmallVector<const SCEV *, 4> Subscripts, Sizes;
+
+  const SCEVUnknown *WriteBasePtr = nullptr, *ReadBasePtr = nullptr;
+  const SCEV *Len = nullptr, *WriteLen = nullptr, *ReadLen = nullptr;
+  const SCEV *WriteSize = nullptr, *ReadSize = nullptr;
+  const SCEV *WriteAF = nullptr, *ReadAF = nullptr;
+  const SCEV *WriteUB = nullptr, *ReadUB = nullptr;
+  unsigned RSize, WSize;
+
+  // Handle memory intrinsics with well defined read and write effects.
+  if (auto *IT = dyn_cast<IntrinsicInst>(CI)) {
+    switch (IT->getIntrinsicID()) {
+    case llvm::Intrinsic::memmove:
+    case llvm::Intrinsic::memcpy:
+      ReadAF = SE->getSCEVAtScope(IT->getOperand(1), L);
+      ReadBasePtr = dyn_cast<SCEVUnknown>(SE->getPointerBase(ReadAF));
+      assert(ReadBasePtr && "Could not find base pointer");
+      ReadAF = SE->getMinusSCEV(ReadAF, ReadBasePtr);
+      RSize =
+          TD->getTypeStoreSize(ReadBasePtr->getType()->getPointerElementType());
+      ReadSize = SE->getConstant(ZeroOffset->getType(), RSize);
+
+    // Fall through
+    case llvm::Intrinsic::memset:
+      WriteAF = SE->getSCEVAtScope(IT->getOperand(0), L);
+      WriteBasePtr = dyn_cast<SCEVUnknown>(SE->getPointerBase(WriteAF));
+      assert(WriteBasePtr && "Could not find base pointer");
+      WriteAF = SE->getMinusSCEV(WriteAF, WriteBasePtr);
+
+      WSize = TD->getTypeStoreSize(
+          WriteBasePtr->getType()->getPointerElementType());
+      WriteSize = SE->getConstant(ZeroOffset->getType(), WSize);
+
+      Len = SE->getSCEVAtScope(IT->getOperand(2), L);
+      WriteLen =
+          SE->getAddExpr(Len, SE->getConstant(Len->getType(), WSize - 1));
+      WriteLen = SE->getUDivExpr(WriteLen, WriteSize);
+      WriteUB = SE->getAddExpr(WriteLen, WriteAF);
+
+      Accs.push_back(std::make_pair(
+          IRAccess(IRAccess::MUST_WRITE, WriteBasePtr->getValue(), WriteAF,
+                   WSize, false, Subscripts, Sizes, WriteAF, WriteUB),
+          CI));
+
+      if (ReadBasePtr) {
+        ReadLen =
+            SE->getAddExpr(Len, SE->getConstant(Len->getType(), RSize - 1));
+        ReadLen = SE->getUDivExpr(ReadLen, ReadSize);
+        ReadUB = SE->getAddExpr(ReadLen, ReadAF);
+
+        Accs.push_back(std::make_pair(
+            IRAccess(IRAccess::READ, ReadBasePtr->getValue(), ReadAF, RSize,
+                     false, Subscripts, Sizes, ReadAF, ReadUB),
+            CI));
+      }
+
+      return;
+    default:
+      break;
+    }
+  }
+
+  llvm_unreachable("Unknown call instruction, cannot model the effect!");
+}
+
 void TempScopInfo::buildAccessFunctions(Region &R, BasicBlock &BB) {
   AccFuncSetType Functions;
   Loop *L = LI->getLoopFor(&BB);
@@ -197,6 +269,11 @@
     if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst))
       Functions.push_back(std::make_pair(buildIRAccess(Inst, L, &R), Inst));
 
+    // Some call accesses and intrinsics might access the memory. If so we
+    // build over approximated IRAccesses for them.
+    if (CallInst *CI = dyn_cast<CallInst>(Inst))
+      buildIRCallAccess(CI, L, &R, Functions);
+
     if (!isa<StoreInst>(Inst) && buildScalarDependences(Inst, &R)) {
       // If the Instruction is used outside the statement, we need to build the
       // write access.
Index: test/ScopInfo/memcpy.ll
===================================================================
--- /dev/null
+++ test/ScopInfo/memcpy.ll
@@ -0,0 +1,64 @@
+; RUN: opt %loadPolly -basicaa -polly-scops -analyze < %s | FileCheck %s
+;
+; CHECK: Stmt_for_body3
+; CHECK:       Domain :=
+; CHECK:           { Stmt_for_body3[i0, i1] : i0 >= 0 and i0 <= 1023 and i1 >= 0 and i1 <= 1023 };
+; CHECK:       Scattering :=
+; CHECK:           { Stmt_for_body3[i0, i1] -> scattering[0, i0, 0, i1, 0] };
+; CHECK:       MustWriteAccess :=  [Reduction Type: NONE]
+; CHECK:           { Stmt_for_body3[i0, i1] -> MemRef_A[o0] : o0 >= -16 and o0 <= -6 };
+; CHECK:       ReadAccess := [Reduction Type: NONE]
+; CHECK:           { Stmt_for_body3[i0, i1] -> MemRef_B[o0] : o0 >= 64 and o0 <= 69 };
+;
+;    #include <string.h>
+;
+;    void jd(int *restrict A, long *restrict B) {
+;      for (int i = 0; i < 1024; i++)
+;        for (int j = 0; j < 1024; j++)
+;          memcpy(A - 4, B + 8, 37);
+;    }
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @jd(i32* noalias %A, i64* noalias %B) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc5, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc6, %for.inc5 ]
+  %exitcond1 = icmp ne i32 %i.0, 1024
+  br i1 %exitcond1, label %for.body, label %for.end7
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %j.0, 1024
+  br i1 %exitcond, label %for.body3, label %for.end
+
+for.body3:                                        ; preds = %for.cond1
+  %add.ptr = getelementptr inbounds i32* %A, i64 -4
+  %tmp = bitcast i32* %add.ptr to i8*
+  %add.ptr4 = getelementptr inbounds i64* %B, i64 8
+  %tmp2 = bitcast i64* %add.ptr4 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %tmp2, i64 37, i32 4, i1 false)
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body3
+  %inc = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end:                                          ; preds = %for.cond1
+  br label %for.inc5
+
+for.inc5:                                         ; preds = %for.end
+  %inc6 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end7:                                         ; preds = %for.cond
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1
+
Index: test/ScopInfo/memmove.ll
===================================================================
--- /dev/null
+++ test/ScopInfo/memmove.ll
@@ -0,0 +1,64 @@
+; RUN: opt %loadPolly -basicaa -polly-scops -analyze < %s | FileCheck %s
+;
+; CHECK: Stmt_for_body3
+; CHECK:       Domain :=
+; CHECK:           { Stmt_for_body3[i0, i1] : i0 >= 0 and i0 <= 1023 and i1 >= 0 and i1 <= 1023 };
+; CHECK:       Scattering :=
+; CHECK:           { Stmt_for_body3[i0, i1] -> scattering[0, i0, 0, i1, 0] };
+; CHECK:       MustWriteAccess :=  [Reduction Type: NONE]
+; CHECK:           { Stmt_for_body3[i0, i1] -> MemRef_A[o0] : o0 >= -16 and o0 <= -8 };
+; CHECK:       ReadAccess := [Reduction Type: NONE]
+; CHECK:           { Stmt_for_body3[i0, i1] -> MemRef_B[o0] : o0 >= 64 and o0 <= 68 };
+;
+;    #include <string.h>
+;
+;    void jd(int *restrict A, long *restrict B) {
+;      for (int i = 0; i < 1024; i++)
+;        for (int j = 0; j < 1024; j++)
+;          memmove(A - 4, B + 8, 32);
+;    }
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @jd(i32* noalias %A, i64* noalias %B) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc5, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc6, %for.inc5 ]
+  %exitcond1 = icmp ne i32 %i.0, 1024
+  br i1 %exitcond1, label %for.body, label %for.end7
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %j.0, 1024
+  br i1 %exitcond, label %for.body3, label %for.end
+
+for.body3:                                        ; preds = %for.cond1
+  %add.ptr = getelementptr inbounds i32* %A, i64 -4
+  %tmp = bitcast i32* %add.ptr to i8*
+  %add.ptr4 = getelementptr inbounds i64* %B, i64 8
+  %tmp2 = bitcast i64* %add.ptr4 to i8*
+  call void @llvm.memmove.p0i8.p0i8.i64(i8* %tmp, i8* %tmp2, i64 32, i32 4, i1 false)
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body3
+  %inc = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end:                                          ; preds = %for.cond1
+  br label %for.inc5
+
+for.inc5:                                         ; preds = %for.end
+  %inc6 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end7:                                         ; preds = %for.cond
+  ret void
+}
+
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1
+
Index: test/ScopInfo/memset.ll
===================================================================
--- /dev/null
+++ test/ScopInfo/memset.ll
@@ -0,0 +1,59 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+;
+; CHECK: Stmt_for_body3
+; CHECK:       Domain :=
+; CHECK:           { Stmt_for_body3[i0, i1] : i0 >= 0 and i0 <= 1023 and i1 >= 0 and i1 <= 1023 };
+; CHECK:       Scattering :=
+; CHECK:           { Stmt_for_body3[i0, i1] -> scattering[0, i0, 0, i1, 0] };
+; CHECK:       MustWriteAccess :=  [Reduction Type: NONE]
+; CHECK:           { Stmt_for_body3[i0, i1] -> MemRef_A[o0] : o0 >= 0 and o0 <= 47 };
+;
+;    #include <string.h>
+;
+;    void jd(int *A) {
+;      for (int i = 0; i < 1024; i++)
+;        for (int j = 0; j < 1024; j++)
+;          memset(A, '$', 187);
+;    }
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @jd(i32* noalias %A) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc4, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc5, %for.inc4 ]
+  %exitcond1 = icmp ne i32 %i.0, 1024
+  br i1 %exitcond1, label %for.body, label %for.end6
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %j.0, 1024
+  br i1 %exitcond, label %for.body3, label %for.end
+
+for.body3:                                        ; preds = %for.cond1
+  %tmp = bitcast i32* %A to i8*
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 36, i64 187, i32 4, i1 false)
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body3
+  %inc = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end:                                          ; preds = %for.cond1
+  br label %for.inc4
+
+for.inc4:                                         ; preds = %for.end
+  %inc5 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end6:                                         ; preds = %for.cond
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #1
+