diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h
--- a/llvm/include/llvm/ADT/STLExtras.h
+++ b/llvm/include/llvm/ADT/STLExtras.h
@@ -1701,6 +1701,10 @@
   return std::unique(adl_begin(R), adl_end(R), P);
 }
 
+template <typename Range> auto unique(Range &&R) {
+  return std::unique(adl_begin(R), adl_end(R));
+}
+
 /// Wrapper function around std::equal to detect if pair-wise elements between
 /// two ranges are the same.
 template <typename L, typename R> bool equal(L &&LRange, R &&RRange) {
diff --git a/llvm/include/llvm/Analysis/Delinearization.h b/llvm/include/llvm/Analysis/Delinearization.h
--- a/llvm/include/llvm/Analysis/Delinearization.h
+++ b/llvm/include/llvm/Analysis/Delinearization.h
@@ -24,6 +24,7 @@
 class GetElementPtrInst;
 class ScalarEvolution;
 class SCEV;
+class Loop;
 
 /// Compute the array dimensions Sizes from the set of Terms extracted from
 /// the memory access function of this SCEVAddRecExpr (second step of
@@ -42,7 +43,8 @@
 /// (third step of delinearization).
 void computeAccessFunctions(ScalarEvolution &SE, const SCEV *Expr,
                             SmallVectorImpl<const SCEV *> &Subscripts,
-                            SmallVectorImpl<const SCEV *> &Sizes);
+                            ArrayRef<const SCEV *> Sizes);
+
 /// Split this SCEVAddRecExpr into two vectors of SCEVs representing the
 /// subscripts and sizes of an array access.
 ///
@@ -68,7 +70,7 @@
 ///
 /// Example:
 ///
-/// A[][n][m]
+/// float A[][n][m]
 ///
 /// for i
 ///   for j
@@ -78,6 +80,8 @@
 /// The initial SCEV:
 ///
 /// A[{{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k]
+/// (Note: The SCEV of a pointer is in bytes, here we use the argument
+/// of the GetElementPtr for illustration)
 ///
 /// 1. Find the different terms in the step functions:
 /// -> [2*m, 5, n*m, n*m]
@@ -106,11 +110,17 @@
 ///
 /// The subscript of the outermost dimension is the Quotient: [j+k].
 ///
-/// Overall, we have: A[][n][m], and the access function: A[j+k][2i][5i].
+/// Overall, for the address of the above memory access we have: A[][n][m][4],
+/// and the access function: A[j+k][2i][5i][0] where [4] is the array element
+/// size in bytes and [0] is the byte offset into this element.
 void delinearize(ScalarEvolution &SE, const SCEV *Expr,
                  SmallVectorImpl<const SCEV *> &Subscripts,
                  SmallVectorImpl<const SCEV *> &Sizes, const SCEV *ElementSize);
 
+bool delinearizeAccess(ScalarEvolution &SE, Value *PtrVal, Type *AccessTy,
+                       Loop *Scope, SmallVectorImpl<const SCEV *> &Subscripts,
+                       SmallVectorImpl<const SCEV *> &Sizes);
+
 /// Gathers the individual index expressions from a GEP instruction.
 ///
 /// This function optimistically assumes the GEP references into a fixed size
diff --git a/llvm/lib/Analysis/Delinearization.cpp b/llvm/lib/Analysis/Delinearization.cpp
--- a/llvm/lib/Analysis/Delinearization.cpp
+++ b/llvm/lib/Analysis/Delinearization.cpp
@@ -344,7 +344,7 @@
 
 void llvm::computeAccessFunctions(ScalarEvolution &SE, const SCEV *Expr,
                                   SmallVectorImpl<const SCEV *> &Subscripts,
-                                  SmallVectorImpl<const SCEV *> &Sizes) {
+                                  ArrayRef<const SCEV *> Sizes) {
   // Early exit in case this SCEV is not an affine multivariate function.
   if (Sizes.empty())
     return;
@@ -359,6 +359,13 @@
     const SCEV *Q, *R;
     SCEVDivision::divide(SE, Res, Sizes[i], &Q, &R);
 
+    // if (i == Last && !isa<SCEVConstant>(R)) {
+    // R = Res;
+    // Q = SE.getZero(R->getType());
+    //   Q = SE.getUDivExpr(Res, Sizes[i]);
+    //   R = SE.getURemExpr(Res, Sizes[i]);
+    // }
+
     LLVM_DEBUG({
       dbgs() << "Res: " << *Res << "\n";
       dbgs() << "Sizes[i]: " << *Sizes[i] << "\n";
@@ -369,19 +376,6 @@
 
     Res = Q;
 
-    // Do not record the last subscript corresponding to the size of elements in
-    // the array.
-    if (i == Last) {
-
-      // Bail out if the byte offset is non-zero.
-      if (!R->isZero()) {
-        Subscripts.clear();
-        Sizes.clear();
-        return;
-      }
-
-      continue;
-    }
 
     // Record the access function for the current subscript.
     Subscripts.push_back(R);
@@ -431,8 +425,7 @@
 /// and then SCEV->delinearize determines the size of some of the dimensions of
 /// the array as these are the multiples by which the strides are happening:
 ///
-///  CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double)
-///  bytes.
+///  CHECK: ArrayDecl[UnknownSize][%m][%o][sizeof(double)]
 ///
 /// Note that the outermost dimension remains of UnknownSize because there are
 /// no strides that would help identifying the size of the last dimension: when
@@ -443,7 +436,7 @@
 /// Finally delinearize provides the access functions for the array reference
 /// that does correspond to A[i][j][k] of the above C testcase:
 ///
-///  CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
+///  CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>][0]
 ///
 /// The testcases are checking the output of a function pass:
 /// DelinearizationPass that walks through all loads and stores of a function
@@ -485,6 +478,21 @@
   });
 }
 
+class SCEVEliminateParams : public SCEVRewriteVisitor<SCEVEliminateParams> {
+public:
+  static const SCEV *rewrite(ScalarEvolution &SE, const SCEV *E) {
+    SCEVEliminateParams Rewriter(SE);
+    return Rewriter.visit(E);
+  }
+
+  SCEVEliminateParams(ScalarEvolution &SE) : SCEVRewriteVisitor(SE) {}
+
+  const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+    auto Ty = Expr->getType();
+    return SE.getZero(Ty);
+  }
+};
+
 bool llvm::getIndexExpressionsFromGEP(ScalarEvolution &SE,
                                       const GetElementPtrInst *GEP,
                                       SmallVectorImpl<const SCEV *> &Subscripts,
@@ -523,6 +531,136 @@
   return !Subscripts.empty();
 }
 
+bool llvm::delinearizeAccess(ScalarEvolution &SE, Value *PtrVal, Type *AccessTy,
+                             Loop *Scope,
+                             SmallVectorImpl<const SCEV *> &Subscripts,
+                             SmallVectorImpl<const SCEV *> &Sizes) {
+
+  const SCEV *PtrExpr = SE.getSCEVAtScope(PtrVal, Scope);
+
+  const SCEVUnknown *BasePointer =
+      dyn_cast<SCEVUnknown>(SE.getPointerBase(PtrExpr));
+  // Do not delinearize if we cannot find the base pointer.
+  if (!BasePointer)
+    return false;
+
+  const SCEV *AccessFn = SE.getMinusSCEV(PtrExpr, BasePointer);
+
+
+
+  Type *PtrItTy = SE.getEffectiveSCEVType(PtrVal->getType());
+  const SCEV *AccessSize = SE.getSizeOfExpr(PtrItTy, AccessTy);
+
+
+  SmallVector<APInt> CandidateElementSize;
+
+  // 1. For non-structure-of-arrays, the access size is the element size.
+  if (const SCEVConstant *C = dyn_cast_or_null<SCEVConstant>(AccessSize)) 
+    CandidateElementSize.push_back(C->getAPInt());
+  
+
+  // 2. Try to infer the element size from the GEP.
+  SmallVector<GEPOperator *> GEPChain;
+  Value *GEPStart = PtrVal;
+  while (GEPOperator *GEP = dyn_cast<GEPOperator>(GEPStart)) {
+    GEPChain.push_back(GEP);
+    GEPStart = GEP->getPointerOperand();
+  }
+  std::reverse(GEPChain.begin(), GEPChain.end());
+
+  auto AddCandidateStructTy = [&](Type *Ty) {
+   const  SCEV* SizeExpr = SE.getSizeOfExpr(PtrItTy, Ty);
+    if (!SizeExpr || SizeExpr->isZero())
+      return;
+    if (const SCEVConstant *C = dyn_cast_or_null<SCEVConstant>(SizeExpr)) 
+      CandidateElementSize.push_back(C->getAPInt());
+  };
+
+  for (GEPOperator *GEP : GEPChain) {
+    Type *SrcType = GEP->getSourceElementType();
+    AddCandidateStructTy(SrcType);
+    // Value* PtrIndex = *GEP->indices().begin();
+    SmallVector<Value *> Indices;
+    llvm::append_range(Indices, llvm::drop_begin(GEP->indices(), 1));
+    for (auto Idx : Indices) {
+      SrcType = GetElementPtrInst::getTypeAtIndex(SrcType, Idx);
+      AddCandidateStructTy(SrcType);
+    }
+    GEPStart = GEP;
+  }
+  assert(GEPStart == PtrVal);
+
+  // 3. Infer that array element size from the SCEV stride.
+    SCEVEliminateParams Eliminator(SE);
+    SmallVector<const SCEV *> Strides;
+    SCEVCollectStrides StrideCollector(SE, Strides);
+    visitAll(AccessFn, StrideCollector);
+    for (const SCEV *Stride : StrideCollector.Strides) {
+      auto ReducedStride = Eliminator.visit(Stride);
+      if (const SCEVConstant *C =
+              dyn_cast_or_null<SCEVConstant>(ReducedStride)) {
+        CandidateElementSize.push_back(C->getAPInt());
+      }
+    }
+  
+
+    // Use largest ElementSize that does give a benefit, fall back to AccessSize
+    // if not does.
+
+    const SCEV *ElementSize = AccessSize;
+  llvm::sort(CandidateElementSize,
+             [](const APInt &LHS, const APInt &RHS) { return LHS.ult(RHS); });
+  CandidateElementSize.erase(llvm::unique(CandidateElementSize),
+                             CandidateElementSize.end());
+  for (auto S : llvm::reverse(CandidateElementSize)) {
+    if (S.isZero())
+      continue;
+
+    const SCEV *Q, *R;
+    SCEVDivision::divide(SE, AccessFn, SE.getConstant(S), &Q, &R);
+
+    // Only consider element sizes where the subscript will be constant. This must be the case for structure accesses.
+    if (isa<SCEVConstant>(R)) {
+      ElementSize = SE.getConstant(S);
+      break;
+    }
+  }
+
+
+  // First step: collect parametric terms.
+  SmallVector<const SCEV *, 4> Terms;
+  collectParametricTerms(SE, AccessFn, Terms);
+
+  if (Terms.empty())
+    return false;
+
+
+  // Second step: find subscript sizes.
+  findArrayDimensions(SE, Terms, Sizes, ElementSize);
+
+  if (Sizes.empty())
+    return false;
+
+  // Third step: compute the access functions for each subscript.
+  computeAccessFunctions(SE, AccessFn, Subscripts, Sizes);
+
+  if (Subscripts.empty())
+    return false;
+
+  LLVM_DEBUG({
+    dbgs() << "succeeded to delinearize " << *AccessFn << "\n";
+    dbgs() << "ArrayDecl[UnknownSize]";
+    for (const SCEV *S : Sizes)
+      dbgs() << "[" << *S << "]";
+
+    dbgs() << "\nArrayRef";
+    for (const SCEV *S : Subscripts)
+      dbgs() << "[" << *S << "]";
+    dbgs() << "\n";
+  });
+  return true;
+}
+
 namespace {
 
 class Delinearization : public FunctionPass {
@@ -548,8 +686,7 @@
   O << "Delinearization on function " << F->getName() << ":\n";
   for (Instruction &Inst : instructions(F)) {
     // Only analyze loads and stores.
-    if (!isa<StoreInst>(&Inst) && !isa<LoadInst>(&Inst) &&
-        !isa<GetElementPtrInst>(&Inst))
+    if (!isa<StoreInst>(&Inst) && !isa<LoadInst>(&Inst))
       continue;
 
     const BasicBlock *BB = Inst.getParent();
@@ -558,6 +695,9 @@
     for (Loop *L = LI->getLoopFor(BB); L != nullptr; L = L->getParentLoop()) {
       const SCEV *AccessFn = SE->getSCEVAtScope(getPointerOperand(&Inst), L);
 
+      auto PtrVal = getPointerOperand(&Inst);
+      auto AccessTy = getLoadStoreType(&Inst);
+
       const SCEVUnknown *BasePointer =
           dyn_cast<SCEVUnknown>(SE->getPointerBase(AccessFn));
       // Do not delinearize if we cannot find the base pointer.
@@ -571,23 +711,22 @@
       O << "AccessFunction: " << *AccessFn << "\n";
 
       SmallVector<const SCEV *, 3> Subscripts, Sizes;
-      delinearize(*SE, AccessFn, Subscripts, Sizes, SE->getElementSize(&Inst));
+      delinearizeAccess(*SE, PtrVal, AccessTy, L, Subscripts, Sizes);
       if (Subscripts.size() == 0 || Sizes.size() == 0 ||
-          Subscripts.size() != Sizes.size()) {
+          Subscripts.size() != Sizes.size() + 1) {
         O << "failed to delinearize\n";
         continue;
       }
 
       O << "Base offset: " << *BasePointer << "\n";
       O << "ArrayDecl[UnknownSize]";
-      int Size = Subscripts.size();
-      for (int i = 0; i < Size - 1; i++)
-        O << "[" << *Sizes[i] << "]";
-      O << " with elements of " << *Sizes[Size - 1] << " bytes.\n";
+      for (const SCEV *Size : Sizes)
+        O << "[" << *Size << "]";
+      O << "\n";
 
       O << "ArrayRef";
-      for (int i = 0; i < Size; i++)
-        O << "[" << *Subscripts[i] << "]";
+      for (const SCEV *Subscript : Subscripts)
+        O << "[" << *Subscript << "]";
       O << "\n";
     }
   }
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -3451,6 +3451,22 @@
   computeAccessFunctions(*SE, SrcAR, SrcSubscripts, Sizes);
   computeAccessFunctions(*SE, DstAR, DstSubscripts, Sizes);
 
+  // computeAccessFunctions also returns the by offset into the array element,
+  // which DependenceAnalysis does not handle. Pop that last subscript and bail
+  // out if it is not zero.
+  // TODO: Support memory accesses that fall into the array element's byte
+  // range.
+  if (!SrcSubscripts.empty()) {
+    const SCEV *EltOffset = SrcSubscripts.pop_back_val();
+    if (!EltOffset->isZero())
+      return false;
+  }
+  if (!DstSubscripts.empty()) {
+    const SCEV *EltOffset = DstSubscripts.pop_back_val();
+    if (!EltOffset->isZero())
+      return false;
+  }
+
   // Fail when there is only a subscript: that's a linearized access function.
   if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 ||
       SrcSubscripts.size() != DstSubscripts.size())
diff --git a/llvm/lib/Analysis/LoopCacheAnalysis.cpp b/llvm/lib/Analysis/LoopCacheAnalysis.cpp
--- a/llvm/lib/Analysis/LoopCacheAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopCacheAnalysis.cpp
@@ -348,15 +348,21 @@
     llvm::delinearize(SE, AccessFn, Subscripts, Sizes,
                       SE.getElementSize(&StoreOrLoadInst));
 
+    // Eliminate the innermost subscript (for the byte offset to the array
+    // element). LoopCacheAnalysis assumes it is zero.
+    if (!Subscripts.empty() && Subscripts.back()->isZero())
+      Subscripts.pop_back();
+
     if (Subscripts.empty() || Sizes.empty() ||
         Subscripts.size() != Sizes.size()) {
+      Subscripts.clear();
+      Sizes.clear();
+
       // Attempt to determine whether we have a single dimensional array access.
       // before giving up.
       if (!isOneDimensionalArray(*AccessFn, *ElemSize, *L, SE)) {
         LLVM_DEBUG(dbgs().indent(2)
                    << "ERROR: failed to delinearize reference\n");
-        Subscripts.clear();
-        Sizes.clear();
         return false;
       }
 
diff --git a/llvm/test/Analysis/Delinearization/a.ll b/llvm/test/Analysis/Delinearization/a.ll
--- a/llvm/test/Analysis/Delinearization/a.ll
+++ b/llvm/test/Analysis/Delinearization/a.ll
@@ -9,8 +9,8 @@
 
 ; AddRec: {{{(28 + (4 * (-4 + (3 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(12 * %o)}<%for.j>,+,20}<%for.k>
 ; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 4 bytes.
-; CHECK: ArrayRef[{3,+,2}<nuw><%for.i>][{-4,+,3}<nw><%for.j>][{7,+,5}<nw><%for.k>]
+; CHECK: ArrayDecl[UnknownSize][%m][%o][4]
+; CHECK: ArrayRef[{3,+,2}<nuw><%for.i>][{-4,+,3}<nw><%for.j>][{7,+,5}<nw><%for.k>][0]
 
 define void @foo(i64 %n, i64 %m, i64 %o, i32* nocapture %A) #0 {
 entry:
diff --git a/llvm/test/Analysis/Delinearization/array_of_struct.ll b/llvm/test/Analysis/Delinearization/array_of_struct.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Analysis/Delinearization/array_of_struct.ll
@@ -0,0 +1,70 @@
+; RUN: opt < %s -analyze -enable-new-pm=0 -delinearize | FileCheck %s
+; RUN: opt < %s -passes='print<delinearization>' -disable-output 2>&1 | FileCheck %s
+
+; CHECK: Inst:  store i64 0, i64* %y, align 1, !tbaa !3
+; CHECK: In Loop with Header: for.cond
+; CHECK: AccessFunction: {4,+,(12 + (12 * %N))}<%for.cond>
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize][%N][12]
+; CHECK: ArrayRef[{0,+,1}<%for.cond>][{0,+,1}<%for.cond>][4]
+
+; struct  __attribute__((packed)) Pair { int x; long y; };
+; void foo(unsigned long N,  struct Pair A[][N]) {
+;   for (unsigned long i = 0; i < N; i+=1)
+;       A[i][i].y = 0;
+; }
+
+
+
+source_filename = "byte_offset.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.Pair = type <{ i32, i64 }>
+
+; Function Attrs: nounwind uwtable
+define dso_local void @foo(i64 %N, %struct.Pair* %A) #0 {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i64 [ 0, %entry ], [ %add, %for.body ]
+  %cmp = icmp ult i64 %i.0, %N
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %0 = mul nsw i64 %i.0, %N
+  %arrayidx = getelementptr inbounds %struct.Pair, %struct.Pair* %A, i64 %0
+  %arrayidx1 = getelementptr inbounds %struct.Pair, %struct.Pair* %arrayidx, i64 %i.0
+  %y = getelementptr inbounds %struct.Pair, %struct.Pair* %arrayidx1, i64 0, i32 1
+  store i64 0, i64* %y, align 1, !tbaa !3
+  %add = add i64 %i.0, 1
+  br label %for.cond, !llvm.loop !9
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; Function Attrs: argmemonly nofree nosync nounwind willreturn
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
+
+; Function Attrs: argmemonly nofree nosync nounwind willreturn
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
+
+attributes #0 = { nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { argmemonly nofree nosync nounwind willreturn }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"uwtable", i32 1}
+!2 = !{!"clang version 14.0.0 (/home/meinersbur/src/llvm-project/clang 164631ce80ecbcc0af069cb6ddd29fef726f222a)"}
+!3 = !{!4, !8, i64 4}
+!4 = !{!"Pair", !5, i64 0, !8, i64 4}
+!5 = !{!"int", !6, i64 0}
+!6 = !{!"omnipotent char", !7, i64 0}
+!7 = !{!"Simple C/C++ TBAA"}
+!8 = !{!"long", !6, i64 0}
+!9 = distinct !{!9, !10}
+!10 = !{!"llvm.loop.mustprogress"}
\ No newline at end of file
diff --git a/llvm/test/Analysis/Delinearization/array_of_struct3.ll b/llvm/test/Analysis/Delinearization/array_of_struct3.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Analysis/Delinearization/array_of_struct3.ll
@@ -0,0 +1,125 @@
+; RUN: opt < %s -analyze -enable-new-pm=0 -delinearize | FileCheck %s
+; RUN: opt < %s -passes='print<delinearization>' -disable-output 2>&1 | FileCheck %s
+
+; CHECK:      Inst:  %3 = load double, double* %b, align 1, !tbaa !3
+; CHECK-NEXT: In Loop with Header: for.cond6
+; CHECK-NEXT: AccessFunction: {{\{\{\{}}(4 + (-13 * %n * %m)),+,(13 * %n * %m)}<%for.cond>,+,(13 * %m)}<%for.cond1>,+,13}<%for.cond6>
+; CHECK-NEXT: Base offset: %f1
+; CHECK-NEXT: ArrayDecl[UnknownSize][%n][%m][13]
+; CHECK-NEXT: ArrayRef[{-1,+,1}<nsw><%for.cond>][{0,+,1}<nuw><nsw><%for.cond1>][{0,+,1}<nuw><nsw><%for.cond6>][4]
+
+; CHECK:      AccessFunction: {{\{\{\{}}4,+,(13 * %n * %m)}<%for.cond>,+,(13 * %m)}<%for.cond1>,+,13}<%for.cond6>
+; CHECK-NEXT: Base offset: %f1
+; CHECK-NEXT: ArrayDecl[UnknownSize][%n][%m][13]
+; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.cond>][{0,+,1}<nuw><nsw><%for.cond1>][{0,+,1}<nuw><nsw><%for.cond6>][4]
+
+; struct __attribute__((packed)) MyS {
+;   float a;
+;   double b;
+;   char c;
+; };
+; void foo(long long n, long long m, struct MyS f1[][n][m]) {
+;   for (int i = 0; i < 1024; i++)
+;     for (int j = 0; j < n; j++)
+;       for (int k = 0; k < m; k++)
+;         f1[i][j][k].b = f1[i-1][j][k].b;
+; }
+
+source_filename = "array_of_struct2.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.MyS = type <{ float, double, i8 }>
+
+; Function Attrs: nounwind uwtable
+define dso_local void @foo(i64 %n, i64 %m, %struct.MyS* %f1) #0 {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc26, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc27, %for.inc26 ]
+  %cmp = icmp ult i32 %i.0, 1024
+  br i1 %cmp, label %for.cond1.preheader, label %for.end28
+
+for.cond1.preheader:                              ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.cond1.preheader, %for.inc23
+  %j.0 = phi i32 [ %inc24, %for.inc23 ], [ 0, %for.cond1.preheader ]
+  %conv = zext i32 %j.0 to i64
+  %cmp2 = icmp slt i64 %conv, %n
+  br i1 %cmp2, label %for.cond6.preheader, label %for.inc26
+
+for.cond6.preheader:                              ; preds = %for.cond1
+  br label %for.cond6
+
+for.cond6:                                        ; preds = %for.cond6.preheader, %for.body11
+  %k.0 = phi i32 [ %inc, %for.body11 ], [ 0, %for.cond6.preheader ]
+  %conv7 = zext i32 %k.0 to i64
+  %cmp8 = icmp slt i64 %conv7, %m
+  br i1 %cmp8, label %for.body11, label %for.inc23
+
+for.body11:                                       ; preds = %for.cond6
+  %sub = add nsw i32 %i.0, -1
+  %idxprom = sext i32 %sub to i64
+  %0 = mul nuw i64 %n, %m
+  %1 = mul nsw i64 %0, %idxprom
+  %arrayidx = getelementptr inbounds %struct.MyS, %struct.MyS* %f1, i64 %1
+  %idxprom12 = zext i32 %j.0 to i64
+  %2 = mul nsw i64 %idxprom12, %m
+  %arrayidx13 = getelementptr inbounds %struct.MyS, %struct.MyS* %arrayidx, i64 %2
+  %idxprom14 = zext i32 %k.0 to i64
+  %arrayidx15 = getelementptr inbounds %struct.MyS, %struct.MyS* %arrayidx13, i64 %idxprom14
+  %b = getelementptr inbounds %struct.MyS, %struct.MyS* %arrayidx15, i64 0, i32 1
+  %3 = load double, double* %b, align 1, !tbaa !3
+  %idxprom16 = zext i32 %i.0 to i64
+  %4 = mul nuw i64 %n, %m
+  %5 = mul nsw i64 %4, %idxprom16
+  %arrayidx17 = getelementptr inbounds %struct.MyS, %struct.MyS* %f1, i64 %5
+  %idxprom18 = zext i32 %j.0 to i64
+  %6 = mul nsw i64 %idxprom18, %m
+  %arrayidx19 = getelementptr inbounds %struct.MyS, %struct.MyS* %arrayidx17, i64 %6
+  %idxprom20 = zext i32 %k.0 to i64
+  %arrayidx21 = getelementptr inbounds %struct.MyS, %struct.MyS* %arrayidx19, i64 %idxprom20
+  %b22 = getelementptr inbounds %struct.MyS, %struct.MyS* %arrayidx21, i64 0, i32 1
+  store double %3, double* %b22, align 1, !tbaa !3
+  %inc = add nuw nsw i32 %k.0, 1
+  br label %for.cond6, !llvm.loop !9
+
+for.inc23:                                        ; preds = %for.cond6
+  %inc24 = add nuw nsw i32 %j.0, 1
+  br label %for.cond1, !llvm.loop !11
+
+for.inc26:                                        ; preds = %for.cond1
+  %inc27 = add nuw nsw i32 %i.0, 1
+  br label %for.cond, !llvm.loop !12
+
+for.end28:                                        ; preds = %for.cond
+  ret void
+}
+
+; Function Attrs: argmemonly nofree nosync nounwind willreturn
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
+
+; Function Attrs: argmemonly nofree nosync nounwind willreturn
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
+
+attributes #0 = { nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { argmemonly nofree nosync nounwind willreturn }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"uwtable", i32 1}
+!2 = !{!"clang version 14.0.0 (/home/meinersbur/src/llvm-project/clang 8f792707c4e5208bba08d837504c93de28b1f209)"}
+!3 = !{!4, !8, i64 4}
+!4 = !{!"MyS", !5, i64 0, !8, i64 4, !6, i64 12}
+!5 = !{!"float", !6, i64 0}
+!6 = !{!"omnipotent char", !7, i64 0}
+!7 = !{!"Simple C/C++ TBAA"}
+!8 = !{!"double", !6, i64 0}
+!9 = distinct !{!9, !10}
+!10 = !{!"llvm.loop.mustprogress"}
+!11 = distinct !{!11, !10}
+!12 = distinct !{!12, !10}
diff --git a/llvm/test/Analysis/Delinearization/byte_offset.ll b/llvm/test/Analysis/Delinearization/byte_offset.ll
--- a/llvm/test/Analysis/Delinearization/byte_offset.ll
+++ b/llvm/test/Analysis/Delinearization/byte_offset.ll
@@ -1,8 +1,12 @@
 ; RUN: opt < %s -analyze -enable-new-pm=0 -delinearize | FileCheck %s
 ; RUN: opt < %s -passes='print<delinearization>' -disable-output 2>&1 | FileCheck %s
 
+; CHECK: Inst:  store float 0.000000e+00, float* %ptr, align 4
+; CHECK: In Loop with Header: inner.loop
 ; CHECK: AccessFunction: ({0,+,%i2}<%outer.loop> + %unknown)
-; CHECK-NEXT: failed to delinearize
+; CHECK: Base offset: %A
+; CHECK: ArrayDecl[UnknownSize][%i2][4]
+; CHECK: ArrayRef[0][0][({0,+,%i2}<%outer.loop> + %unknown)]
 
 ; void foo(char A[], long i2, bool c) {
 ;   for (long i = 0; ; ++i) {
diff --git a/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll b/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll
--- a/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll
+++ b/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll
@@ -6,15 +6,15 @@
 ; CHECK-NEXT: In Loop with Header: for.inc
 ; CHECK-NEXT: AccessFunction: {(4 * %N * %call),+,4}<nsw><%for.inc>
 ; CHECK-NEXT: Base offset: %A
-; CHECK-NEXT: ArrayDecl[UnknownSize][%N] with elements of 4 bytes.
-; CHECK-NEXT: ArrayRef[%call][{0,+,1}<nuw><nsw><%for.inc>]
+; CHECK-NEXT: ArrayDecl[UnknownSize][%N][4]
+; CHECK-NEXT: ArrayRef[%call][{0,+,1}<nuw><nsw><%for.inc>][0]
 
 ; CHECK:      Inst:  %tmp5 = load float, float* %arrayidx4, align 4
 ; CHECK-NEXT: In Loop with Header: for.inc
 ; CHECK-NEXT: AccessFunction: {(4 * %call1),+,(4 * %N)}<nsw><%for.inc>
 ; CHECK-NEXT: Base offset: %B
-; CHECK-NEXT: ArrayDecl[UnknownSize][%N] with elements of 4 bytes.
-; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.inc>][%call1]
+; CHECK-NEXT: ArrayDecl[UnknownSize][%N][4]
+; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.inc>][%call1][0]
 
 ; Function Attrs: noinline nounwind uwtable
 define void @mat_mul(float* %C, float* %A, float* %B, i64 %N) #0 !kernel_arg_addr_space !2 !kernel_arg_access_qual !3 !kernel_arg_type !4 !kernel_arg_base_type !4 !kernel_arg_type_qual !5 {
diff --git a/llvm/test/Analysis/Delinearization/divide_by_one.ll b/llvm/test/Analysis/Delinearization/divide_by_one.ll
--- a/llvm/test/Analysis/Delinearization/divide_by_one.ll
+++ b/llvm/test/Analysis/Delinearization/divide_by_one.ll
@@ -13,14 +13,14 @@
 ; AddRec: {{(-1 + ((1 + %bs) * %stride)),+,(-1 * %stride)}<%for.cond1.preheader>,+,1}<nw><%for.body3>
 ; CHECK: Inst:  %0 = load i8, i8* %arrayidx, align 1
 ; CHECK: Base offset: %dst
-; CHECK: ArrayDecl[UnknownSize][%stride] with elements of 1 bytes.
-; CHECK: ArrayRef[{(1 + %bs),+,-1}<nw><%for.cond1.preheader>][{-1,+,1}<nw><%for.body3>]
+; CHECK: ArrayDecl[UnknownSize][%stride][1]
+; CHECK: ArrayRef[{(1 + %bs),+,-1}<nw><%for.cond1.preheader>][{-1,+,1}<nw><%for.body3>][0]
 
 ; AddRec: {{(%stride * %bs),+,(-1 * %stride)}<%for.cond1.preheader>,+,1}<nw><%for.body3>
 ; CHECK: Inst:  store i8 %0, i8* %arrayidx7, align 1
 ; CHECK: Base offset: %dst
-; CHECK: ArrayDecl[UnknownSize][%stride] with elements of 1 bytes.
-; CHECK: ArrayRef[{%bs,+,-1}<nsw><%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.body3>]
+; CHECK: ArrayDecl[UnknownSize][%stride][1]
+; CHECK: ArrayRef[{%bs,+,-1}<nsw><%for.cond1.preheader>][{0,+,1}<nuw><nsw><%for.body3>][0]
 
 define void @test(i8* nocapture %dst, i32 %stride, i32 %bs) {
 entry:
diff --git a/llvm/test/Analysis/Delinearization/gcd_multiply_expr.ll b/llvm/test/Analysis/Delinearization/gcd_multiply_expr.ll
deleted file mode 100644
--- a/llvm/test/Analysis/Delinearization/gcd_multiply_expr.ll
+++ /dev/null
@@ -1,153 +0,0 @@
-; RUN: opt < %s -aa-pipeline=basic-aa -passes='require<da>,print<delinearization>' -disable-output
-;
-; a, b, c, d, g, h;
-; char *f;
-; static fn1(p1) {
-;   char *e = p1;
-;   for (; d;) {
-;     a = 0;
-;     for (;; ++a)
-;       for (; b; ++b)
-;         c = e[b + a];
-;   }
-; }
-;
-; fn2() {
-;   for (;;)
-;     fn1(&f[g * h]);
-; }
-
-@g = common global i32 0, align 4
-@h = common global i32 0, align 4
-@f = common global i8* null, align 4
-@a = common global i32 0, align 4
-@b = common global i32 0, align 4
-@c = common global i32 0, align 4
-@d = common global i32 0, align 4
-
-define i32 @fn2() {
-entry:
-  %.pr = load i32, i32* @d, align 4
-  %phitmp = icmp eq i32 %.pr, 0
-  br label %for.cond
-
-for.cond:
-  %0 = phi i1 [ true, %for.cond ], [ %phitmp, %entry ]
-  br i1 %0, label %for.cond, label %for.cond2thread-pre-split.preheader.i
-
-for.cond2thread-pre-split.preheader.i:
-  %1 = load i32, i32* @g, align 4
-  %2 = load i32, i32* @h, align 4
-  %mul = mul nsw i32 %2, %1
-  %3 = load i8*, i8** @f, align 4
-  %.pr.pre.i = load i32, i32* @b, align 4
-  br label %for.cond2thread-pre-split.i
-
-for.cond2thread-pre-split.i:
-  %.pr.i = phi i32 [ 0, %for.inc5.i ], [ %.pr.pre.i, %for.cond2thread-pre-split.preheader.i ]
-  %storemerge.i = phi i32 [ %inc6.i, %for.inc5.i ], [ 0, %for.cond2thread-pre-split.preheader.i ]
-  store i32 %storemerge.i, i32* @a, align 4
-  %tobool31.i = icmp eq i32 %.pr.i, 0
-  br i1 %tobool31.i, label %for.inc5.i, label %for.body4.preheader.i
-
-for.body4.preheader.i:
-  %4 = icmp slt i32 %.pr.i, -7
-  %add.i = add i32 %storemerge.i, %mul
-  br i1 %4, label %for.body4.i.preheader, label %for.body4.ur.i.preheader
-
-for.body4.i.preheader:
-  %5 = sub i32 -8, %.pr.i
-  %6 = lshr i32 %5, 3
-  %7 = mul i32 %6, 8
-  br label %for.body4.i
-
-for.body4.i:
-  %8 = phi i32 [ %inc.7.i, %for.body4.i ], [ %.pr.i, %for.body4.i.preheader ]
-  %arrayidx.sum1 = add i32 %add.i, %8
-  %arrayidx.i = getelementptr inbounds i8, i8* %3, i32 %arrayidx.sum1
-  %9 = load i8, i8* %arrayidx.i, align 1
-  %conv.i = sext i8 %9 to i32
-  store i32 %conv.i, i32* @c, align 4
-  %inc.i = add nsw i32 %8, 1
-  store i32 %inc.i, i32* @b, align 4
-  %arrayidx.sum2 = add i32 %add.i, %inc.i
-  %arrayidx.1.i = getelementptr inbounds i8, i8* %3, i32 %arrayidx.sum2
-  %10 = load i8, i8* %arrayidx.1.i, align 1
-  %conv.1.i = sext i8 %10 to i32
-  store i32 %conv.1.i, i32* @c, align 4
-  %inc.1.i = add nsw i32 %8, 2
-  store i32 %inc.1.i, i32* @b, align 4
-  %arrayidx.sum3 = add i32 %add.i, %inc.1.i
-  %arrayidx.2.i = getelementptr inbounds i8, i8* %3, i32 %arrayidx.sum3
-  %11 = load i8, i8* %arrayidx.2.i, align 1
-  %conv.2.i = sext i8 %11 to i32
-  store i32 %conv.2.i, i32* @c, align 4
-  %inc.2.i = add nsw i32 %8, 3
-  store i32 %inc.2.i, i32* @b, align 4
-  %arrayidx.sum4 = add i32 %add.i, %inc.2.i
-  %arrayidx.3.i = getelementptr inbounds i8, i8* %3, i32 %arrayidx.sum4
-  %12 = load i8, i8* %arrayidx.3.i, align 1
-  %conv.3.i = sext i8 %12 to i32
-  store i32 %conv.3.i, i32* @c, align 4
-  %inc.3.i = add nsw i32 %8, 4
-  store i32 %inc.3.i, i32* @b, align 4
-  %arrayidx.sum5 = add i32 %add.i, %inc.3.i
-  %arrayidx.4.i = getelementptr inbounds i8, i8* %3, i32 %arrayidx.sum5
-  %13 = load i8, i8* %arrayidx.4.i, align 1
-  %conv.4.i = sext i8 %13 to i32
-  store i32 %conv.4.i, i32* @c, align 4
-  %inc.4.i = add nsw i32 %8, 5
-  store i32 %inc.4.i, i32* @b, align 4
-  %arrayidx.sum6 = add i32 %add.i, %inc.4.i
-  %arrayidx.5.i = getelementptr inbounds i8, i8* %3, i32 %arrayidx.sum6
-  %14 = load i8, i8* %arrayidx.5.i, align 1
-  %conv.5.i = sext i8 %14 to i32
-  store i32 %conv.5.i, i32* @c, align 4
-  %inc.5.i = add nsw i32 %8, 6
-  store i32 %inc.5.i, i32* @b, align 4
-  %arrayidx.sum7 = add i32 %add.i, %inc.5.i
-  %arrayidx.6.i = getelementptr inbounds i8, i8* %3, i32 %arrayidx.sum7
-  %15 = load i8, i8* %arrayidx.6.i, align 1
-  %conv.6.i = sext i8 %15 to i32
-  store i32 %conv.6.i, i32* @c, align 4
-  %inc.6.i = add nsw i32 %8, 7
-  store i32 %inc.6.i, i32* @b, align 4
-  %arrayidx.sum8 = add i32 %add.i, %inc.6.i
-  %arrayidx.7.i = getelementptr inbounds i8, i8* %3, i32 %arrayidx.sum8
-  %16 = load i8, i8* %arrayidx.7.i, align 1
-  %conv.7.i = sext i8 %16 to i32
-  store i32 %conv.7.i, i32* @c, align 4
-  %inc.7.i = add nsw i32 %8, 8
-  store i32 %inc.7.i, i32* @b, align 4
-  %tobool3.7.i = icmp sgt i32 %inc.7.i, -8
-  br i1 %tobool3.7.i, label %for.inc5.loopexit.ur-lcssa.i, label %for.body4.i
-
-for.inc5.loopexit.ur-lcssa.i:
-  %17 = add i32 %.pr.i, 8
-  %18 = add i32 %17, %7
-  %19 = icmp eq i32 %18, 0
-  br i1 %19, label %for.inc5.i, label %for.body4.ur.i.preheader
-
-for.body4.ur.i.preheader:
-  %.ph = phi i32 [ %18, %for.inc5.loopexit.ur-lcssa.i ], [ %.pr.i, %for.body4.preheader.i ]
-  br label %for.body4.ur.i
-
-for.body4.ur.i:
-  %20 = phi i32 [ %inc.ur.i, %for.body4.ur.i ], [ %.ph, %for.body4.ur.i.preheader ]
-  %arrayidx.sum = add i32 %add.i, %20
-  %arrayidx.ur.i = getelementptr inbounds i8, i8* %3, i32 %arrayidx.sum
-  %21 = load i8, i8* %arrayidx.ur.i, align 1
-  %conv.ur.i = sext i8 %21 to i32
-  store i32 %conv.ur.i, i32* @c, align 4
-  %inc.ur.i = add nsw i32 %20, 1
-  store i32 %inc.ur.i, i32* @b, align 4
-  %tobool3.ur.i = icmp eq i32 %inc.ur.i, 0
-  br i1 %tobool3.ur.i, label %for.inc5.i.loopexit, label %for.body4.ur.i
-
-for.inc5.i.loopexit:
-  br label %for.inc5.i
-
-for.inc5.i:
-  %inc6.i = add nsw i32 %storemerge.i, 1
-  br label %for.cond2thread-pre-split.i
-}
diff --git a/llvm/test/Analysis/Delinearization/himeno_1.ll b/llvm/test/Analysis/Delinearization/himeno_1.ll
--- a/llvm/test/Analysis/Delinearization/himeno_1.ll
+++ b/llvm/test/Analysis/Delinearization/himeno_1.ll
@@ -28,8 +28,8 @@
 
 ; AddRec: {{{(4 + (4 * (sext i32 %a.deps to i64) * (1 + (sext i32 %a.cols to i64))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>,+,(4 * (sext i32 %a.deps to i64))}<%for.j>,+,4}<%for.k>
 ; CHECK: Base offset: %a.base
-; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.cols to i64)][(sext i32 %a.deps to i64)] with elements of 4 bytes.
-; CHECK: ArrayRef[{1,+,1}<nuw><nsw><%for.i>][{1,+,1}<nuw><nsw><%for.j>][{1,+,1}<nuw><nsw><%for.k>]
+; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.cols to i64)][(sext i32 %a.deps to i64)][4]
+; CHECK: ArrayRef[{1,+,1}<nuw><nsw><%for.i>][{1,+,1}<nuw><nsw><%for.j>][{1,+,1}<nuw><nsw><%for.k>][0]
 
 %struct.Mat = type { float*, i32, i32, i32, i32 }
 
diff --git a/llvm/test/Analysis/Delinearization/himeno_2.ll b/llvm/test/Analysis/Delinearization/himeno_2.ll
--- a/llvm/test/Analysis/Delinearization/himeno_2.ll
+++ b/llvm/test/Analysis/Delinearization/himeno_2.ll
@@ -28,8 +28,8 @@
 
 ; AddRec: {{{(4 + (4 * (sext i32 %a.deps to i64) * (1 + (sext i32 %a.cols to i64))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>,+,(4 * (sext i32 %a.deps to i64))}<%for.j>,+,4}<%for.k>
 ; CHECK: Base offset: %a.base
-; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.cols to i64)][(sext i32 %a.deps to i64)] with elements of 4 bytes.
-; CHECK: ArrayRef[{1,+,1}<nuw><nsw><%for.i>][{1,+,1}<nuw><nsw><%for.j>][{1,+,1}<nuw><nsw><%for.k>]
+; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.cols to i64)][(sext i32 %a.deps to i64)][4]
+; CHECK: ArrayRef[{1,+,1}<nuw><nsw><%for.i>][{1,+,1}<nuw><nsw><%for.j>][{1,+,1}<nuw><nsw><%for.k>][0]
 
 %struct.Mat = type { float*, i32, i32, i32, i32 }
 
diff --git a/llvm/test/Analysis/Delinearization/iv_times_constant_in_subscript.ll b/llvm/test/Analysis/Delinearization/iv_times_constant_in_subscript.ll
--- a/llvm/test/Analysis/Delinearization/iv_times_constant_in_subscript.ll
+++ b/llvm/test/Analysis/Delinearization/iv_times_constant_in_subscript.ll
@@ -10,10 +10,15 @@
 
 ; AddRec: {{((%m * %b * 8) + %A),+,(2 * %m * 8)}<%for.i>,+,(2 * 8)}<%for.j>
 ; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize][%m] with elements of 8 bytes.
-; CHECK: ArrayRef[{%b,+,2}<nsw><%for.i>][{0,+,2}<nuw><%for.j>]
+; CHECK: ArrayDecl[UnknownSize][%m][8]
+; CHECK: ArrayRef[{%b,+,2}<nsw><%for.i>][{0,+,2}<nuw><%for.j>][0]
 
 
+; A[2i+b][2j][0]
+; A[(2*i+b)*m*8+2*j*8+0]
+; A[2*i*m*8+b*m*8+2*j*8+0]
+; A[i*m*16+b*m*8+j*16]
+
 define void @foo(i64 %n, i64 %m, i64 %b, double* %A) {
 entry:
   br label %for.i
diff --git a/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll b/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll
--- a/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll
+++ b/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll
@@ -10,8 +10,8 @@
 
 ; AddRec: {{{(56 + (8 * (-4 + (3 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
 ; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes.
-; CHECK: ArrayRef[{3,+,1}<nuw><%for.i>][{-4,+,1}<nw><%for.j>][{7,+,1}<nuw><nsw><%for.k>]
+; CHECK: ArrayDecl[UnknownSize][%m][%o][8]
+; CHECK: ArrayRef[{3,+,1}<nuw><%for.i>][{-4,+,1}<nw><%for.j>][{7,+,1}<nuw><nsw><%for.k>][0]
 
 define void @foo(i64 %n, i64 %m, i64 %o, double* %A) {
 entry:
diff --git a/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll b/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll
--- a/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll
+++ b/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll
@@ -10,8 +10,8 @@
 
 ; AddRec: {{{(56 + (8 * (-4 + (3 * %m)) * (%o + %p)) + %A),+,(8 * (%o + %p) * %m)}<%for.cond4.preheader.lr.ph.us>,+,(8 * (%o + %p))}<%for.body6.lr.ph.us.us>,+,8}<%for.body6.us.us>
 ; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize][%m][(%o + %p)] with elements of 8 bytes.
-; CHECK: ArrayRef[{3,+,1}<nuw><%for.cond4.preheader.lr.ph.us>][{-4,+,1}<nw><%for.body6.lr.ph.us.us>][{7,+,1}<nw><%for.body6.us.us>]
+; CHECK: ArrayDecl[UnknownSize][%m][(%o + %p)][8]
+; CHECK: ArrayRef[{3,+,1}<nuw><%for.cond4.preheader.lr.ph.us>][{-4,+,1}<nw><%for.body6.lr.ph.us.us>][{7,+,1}<nw><%for.body6.us.us>][0]
 
 define void @foo(i64 %n, i64 %m, i64 %o, i64 %p, double* nocapture %A) nounwind uwtable {
 entry:
diff --git a/llvm/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll b/llvm/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll
--- a/llvm/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll
+++ b/llvm/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll
@@ -10,8 +10,8 @@
 
 ; AddRec: {{{((8 * ((((%m * %p) + %q) * %o) + %r)) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
 ; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes.
-; CHECK: ArrayRef[{%p,+,1}<nw><%for.i>][{%q,+,1}<nw><%for.j>][{%r,+,1}<nsw><%for.k>]
+; CHECK: ArrayDecl[UnknownSize][%m][%o][8]
+; CHECK: ArrayRef[{%p,+,1}<nw><%for.i>][{%q,+,1}<nw><%for.j>][{%r,+,1}<nsw><%for.k>][0]
 
 define void @foo(i64 %n, i64 %m, i64 %o, double* %A, i64 %p, i64 %q, i64 %r) {
 entry:
diff --git a/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll b/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll
--- a/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll
+++ b/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll
@@ -12,15 +12,15 @@
 ; In Loop with Header: for.j
 ; AddRec: {{0,+,(%m * 8)}<%for.i>,+,8}<%for.j>
 ; Base offset: %A
-; ArrayDecl[UnknownSize][%m] with elements of 8 bytes.
-; ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>]
+; ArrayDecl[UnknownSize][%m][8]
+; ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>][0]
 
 ; Inst:  store double %val, double* %arrayidx
 ; In Loop with Header: for.j
 ; AddRec: {{%A,+,(8 * %m)}<%for.i>,+,8}<%for.j>
 ; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize][%m] with elements of 8 bytes.
-; CHECK: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>]
+; CHECK: ArrayDecl[UnknownSize][%m][8]
+; CHECK: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>][0]
 
 define void @foo(i64 %n, i64 %m, double* %A) {
 entry:
diff --git a/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d.ll b/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d.ll
--- a/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d.ll
+++ b/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d.ll
@@ -10,8 +10,8 @@
 
 ; AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
 ; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes.
-; CHECK: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>][{0,+,1}<nuw><nsw><%for.k>]
+; CHECK: ArrayDecl[UnknownSize][%m][%o][8]
+; CHECK: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>][{0,+,1}<nuw><nsw><%for.k>][0]
 
 define void @foo(i64 %n, i64 %m, i64 %o, double* %A) {
 entry:
diff --git a/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll b/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll
--- a/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll
+++ b/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll
@@ -9,8 +9,8 @@
 
 ; AddRec: {{{%A,+,(8 * (zext i32 %m to i64) * (zext i32 %o to i64))}<%for.i>,+,(8 * (zext i32 %o to i64))}<%for.j>,+,8}<%for.k>
 ; CHECK: Base offset: %A
-; CHECK: ArrayDecl[UnknownSize][(zext i32 %m to i64)][(zext i32 %o to i64)] with elements of 8 bytes.
-; CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
+; CHECK: ArrayDecl[UnknownSize][(zext i32 %m to i64)][(zext i32 %o to i64)][8]
+; CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>][0]
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Analysis/Delinearization/parameter_addrec_product.ll b/llvm/test/Analysis/Delinearization/parameter_addrec_product.ll
--- a/llvm/test/Analysis/Delinearization/parameter_addrec_product.ll
+++ b/llvm/test/Analysis/Delinearization/parameter_addrec_product.ll
@@ -6,8 +6,8 @@
 ;          A[i * (*p) + j] += i + j;
 ;    }
 ;
-; CHECK: ArrayDecl[UnknownSize][%pval] with elements of 4 bytes.
-; CHECK: ArrayRef[{0,+,1}<nuw><nsw><%bb2>][{0,+,1}<nuw><nsw><%bb4>]
+; CHECK: ArrayDecl[UnknownSize][%pval][4]
+; CHECK: ArrayRef[{0,+,1}<nuw><nsw><%bb2>][{0,+,1}<nuw><nsw><%bb4>][0]
 ;
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
diff --git a/polly/lib/Analysis/ScopDetection.cpp b/polly/lib/Analysis/ScopDetection.cpp
--- a/polly/lib/Analysis/ScopDetection.cpp
+++ b/polly/lib/Analysis/ScopDetection.cpp
@@ -983,6 +983,11 @@
   return true;
 }
 
+static bool isKnownLessOrEqual(ScalarEvolution &SE, const SCEV *LHS,
+                               const SCEV *RHS) {
+  return SE.isKnownPredicate(ICmpInst::ICMP_SLE, LHS, RHS);
+}
+
 // We first store the resulting memory accesses in TempMemoryAccesses. Only
 // if the access functions for all memory accesses have been successfully
 // delinearized we continue. Otherwise, we either report a failure or, if
@@ -1015,8 +1020,24 @@
       } else {
         llvm::computeAccessFunctions(SE, AF, Acc->DelinearizedSubscripts,
                                      Shape->DelinearizedSizes);
-        if (Acc->DelinearizedSubscripts.size() == 0)
+        if (Acc->DelinearizedSubscripts.size() == 0) {
           IsNonAffine = true;
+        } else {
+          // TODO: Do not drop the last DelinearizedSubscripts. If non-zero it
+          // must be preseved for code-generation such that it represents the
+          // same pointer.
+          const SCEV *AccOffset = Acc->DelinearizedSubscripts.pop_back_val();
+          const SCEV *ArrSize = Shape->DelinearizedSizes.back();
+          const SCEV *AccSize = Context.ElementSize[BasePointer];
+          if (!AccOffset->isZero() ||
+              !isKnownLessOrEqual(SE, AccSize, ArrSize)) {
+            LLVM_DEBUG(dbgs()
+                       << "Accessed element bytes [" << *AccOffset << ",+"
+                       << *AccSize << ") possibly outside element [0,"
+                       << *ArrSize << ")\n");
+            IsNonAffine = true;
+          }
+        }
       }
       for (const SCEV *S : Acc->DelinearizedSubscripts)
         if (!isAffine(S, Scope, Context))
diff --git a/polly/test/ScopDetect/array_elt_byte_offset.ll b/polly/test/ScopDetect/array_elt_byte_offset.ll
new file mode 100644
--- /dev/null
+++ b/polly/test/ScopDetect/array_elt_byte_offset.ll
@@ -0,0 +1,78 @@
+; RUN: opt %loadPolly -polly-detect -disable-output -debug < %s 2>&1 | FileCheck %s
+
+; CHECK: Accessed element bytes [{{.+}},+4) possibly outside element [0,4)
+; REQUIRES: asserts
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+%struct.CCGFace = type { %struct.CCGFace*, i8*, i16, i16, i16, i16 }
+%struct.CCGVert = type { %struct.CCGVert*, i8*, i16, i16, i16, i16, %struct.CCGEdge**, %struct.CCGFace** }
+%struct.CCGEdge = type { %struct.CCGEdge*, i8*, i16, i16, float, %struct.CCGVert*, %struct.CCGVert*, %struct.CCGFace** }
+
+define void @ccgSubSurf__calcSubdivLevel() {
+entry:
+  %shl.i1795 = shl nuw i32 1, undef
+  %i = load i32, i32* undef, align 8
+  %shl.i.i = shl nuw i32 1, undef
+  %add.i.i = add nuw nsw i32 %shl.i.i, 1
+  %mul.i = mul nsw i32 %add.i.i, %add.i.i
+  %add.i1797 = add nsw i32 %mul.i, %add.i.i
+  %smax = call i32 @llvm.smax.i32(i32 %shl.i1795, i32 1)
+  %i1 = sext i32 %add.i1797 to i64
+  %i2 = sext i32 %i to i64
+  %i3 = load %struct.CCGFace*, %struct.CCGFace** undef, align 8
+  %i4 = load i16, i16* undef, align 8
+  %arrayidx.i.i.i.i = getelementptr inbounds %struct.CCGFace, %struct.CCGFace* %i3, i64 1
+  %i5 = bitcast %struct.CCGFace* %arrayidx.i.i.i.i to %struct.CCGVert**
+  %idxprom.i.i.i = sext i16 %i4 to i64
+  %arrayidx.i.i.i = getelementptr inbounds %struct.CCGVert*, %struct.CCGVert** %i5, i64 %idxprom.i.i.i
+  %arrayidx2.i.i = getelementptr inbounds %struct.CCGVert*, %struct.CCGVert** %arrayidx.i.i.i, i64 %idxprom.i.i.i
+  %i6 = bitcast %struct.CCGVert** %arrayidx2.i.i to i8*
+  %i7 = load i32, i32* undef, align 4
+  %wide.trip.count.i.us.us.us = zext i32 %i7 to i64
+  br label %for.cond10.preheader.us.us.us
+
+for.cond10.preheader.us.us.us:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.cond10.for.inc38_crit_edge.split.us.split.us.us.us.us ]
+  %i8 = mul nsw i64 %indvars.iv, %i1
+  %i9 = add nsw i64 %i8, 1
+  %i10 = mul nsw i64 %i9, %i2
+  %add.ptr.i.us.us.us = getelementptr inbounds i8, i8* %i6, i64 %i10
+  br label %for.cond14.preheader.us.us.us.us.us
+
+for.cond14.preheader.us.us.us.us.us:
+  br label %for.body18.us.us.us.us.us.us
+
+for.body18.us.us.us.us.us.us:
+  %x.04761.us.us.us.us.us.us = phi i32 [ 0, %for.cond14.preheader.us.us.us.us.us ], [ %add25.us.us.us.us.us.us, %VertDataAvg4.exit.loopexit.us.us.us.us.us.us ]
+  %mul8.i.us.us.us.us.us.us = shl i32 undef, undef
+  %add9.i.us.us.us.us.us.us = add nsw i32 %mul8.i.us.us.us.us.us.us, %add.i.i
+  %mul10.i.us.us.us.us.us.us = mul nsw i32 %add9.i.us.us.us.us.us.us, %i
+  %idxprom.i.us.us.us.us.us.us = sext i32 %mul10.i.us.us.us.us.us.us to i64
+  %arrayidx.i.us.us.us.us.us.us = getelementptr inbounds i8, i8* %add.ptr.i.us.us.us, i64 %idxprom.i.us.us.us.us.us.us
+  %i11 = bitcast i8* %arrayidx.i.us.us.us.us.us.us to float*
+  %add25.us.us.us.us.us.us = add nuw nsw i32 %x.04761.us.us.us.us.us.us, 1
+  br label %for.body.i.us.us.us.us.us.us
+
+for.body.i.us.us.us.us.us.us:
+  %indvars.iv.i.us.us.us.us.us.us = phi i64 [ 0, %for.body18.us.us.us.us.us.us ], [ %indvars.iv.next.i.us.us.us.us.us.us, %for.body.i.us.us.us.us.us.us ]
+  %arrayidx.i1890.us.us.us.us.us.us = getelementptr inbounds float, float* %i11, i64 %indvars.iv.i.us.us.us.us.us.us
+  %i12 = load float, float* %arrayidx.i1890.us.us.us.us.us.us, align 4
+  %indvars.iv.next.i.us.us.us.us.us.us = add nuw nsw i64 %indvars.iv.i.us.us.us.us.us.us, 1
+  %exitcond.not.i.us.us.us.us.us.us = icmp eq i64 %indvars.iv.next.i.us.us.us.us.us.us, %wide.trip.count.i.us.us.us
+  br i1 %exitcond.not.i.us.us.us.us.us.us, label %VertDataAvg4.exit.loopexit.us.us.us.us.us.us, label %for.body.i.us.us.us.us.us.us
+
+VertDataAvg4.exit.loopexit.us.us.us.us.us.us:
+  %exitcond.not = icmp eq i32 %add25.us.us.us.us.us.us, %smax
+  br i1 %exitcond.not, label %for.cond14.for.inc35_crit_edge.split.us.us.us.us.us.us, label %for.body18.us.us.us.us.us.us
+
+for.cond14.for.inc35_crit_edge.split.us.us.us.us.us.us:
+  br i1 undef, label %for.cond10.for.inc38_crit_edge.split.us.split.us.us.us.us, label %for.cond14.preheader.us.us.us.us.us
+
+for.cond10.for.inc38_crit_edge.split.us.split.us.us.us.us:
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br label %for.cond10.preheader.us.us.us
+}
+
+declare i32 @llvm.smax.i32(i32, i32)
+