diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1113,11 +1113,12 @@ void collectParametricTerms(const SCEV *Expr, SmallVectorImpl &Terms); - /// Return in Subscripts the access functions for each dimension in Sizes - /// (third step of delinearization). + /// Return in Subscripts the access functions for each dimension in Sizes plus + /// for an outermost dimension of unknown size (third step of + /// delinearization). void computeAccessFunctions(const SCEV *Expr, SmallVectorImpl &Subscripts, - SmallVectorImpl &Sizes); + ArrayRef Sizes); /// Gathers the individual index expressions from a GEP instruction. /// @@ -1157,7 +1158,7 @@ /// /// Example: /// - /// A[][n][m] + /// float A[][n][m] /// /// for i /// for j @@ -1167,6 +1168,8 @@ /// The initial SCEV: /// /// A[{{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k] + /// (Note: The SCEV of a pointer is in bytes, here we use the argument + /// of the GetElementPtr for illustration) /// /// 1. Find the different terms in the step functions: /// -> [2*m, 5, n*m, n*m] @@ -1195,7 +1198,9 @@ /// /// The subscript of the outermost dimension is the Quotient: [j+k]. /// - /// Overall, we have: A[][n][m], and the access function: A[j+k][2i][5i]. + /// Overall, for the address of a memory access, we have: A[][n][m][4], and + /// the access function: A[j+k][2i][5i][0] where [4] is the array element size + /// in bytes and [0] is the byte offset into this element. void delinearize(const SCEV *Expr, SmallVectorImpl &Subscripts, SmallVectorImpl &Sizes, const SCEV *ElementSize); diff --git a/llvm/lib/Analysis/Delinearization.cpp b/llvm/lib/Analysis/Delinearization.cpp --- a/llvm/lib/Analysis/Delinearization.cpp +++ b/llvm/lib/Analysis/Delinearization.cpp @@ -86,21 +86,20 @@ SmallVector Subscripts, Sizes; SE->delinearize(AccessFn, Subscripts, Sizes, SE->getElementSize(&Inst)); if (Subscripts.size() == 0 || Sizes.size() == 0 || - Subscripts.size() != Sizes.size()) { + Subscripts.size() != Sizes.size() + 1) { O << "failed to delinearize\n"; continue; } O << "Base offset: " << *BasePointer << "\n"; O << "ArrayDecl[UnknownSize]"; - int Size = Subscripts.size(); - for (int i = 0; i < Size - 1; i++) - O << "[" << *Sizes[i] << "]"; - O << " with elements of " << *Sizes[Size - 1] << " bytes.\n"; + for (const SCEV *Size : Sizes) + O << "[" << *Size << "]"; + O << "\n"; O << "ArrayRef"; - for (int i = 0; i < Size; i++) - O << "[" << *Subscripts[i] << "]"; + for (const SCEV *Subscript : Subscripts) + O << "[" << *Subscript << "]"; O << "\n"; } } diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp --- a/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -3451,7 +3451,7 @@ SE->computeAccessFunctions(DstAR, DstSubscripts, Sizes); // Fail when there is only a subscript: that's a linearized access function. - if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 || + if (SrcSubscripts.size() <= 2 || DstSubscripts.size() <= 2 || SrcSubscripts.size() != DstSubscripts.size()) return false; diff --git a/llvm/lib/Analysis/LoopCacheAnalysis.cpp b/llvm/lib/Analysis/LoopCacheAnalysis.cpp --- a/llvm/lib/Analysis/LoopCacheAnalysis.cpp +++ b/llvm/lib/Analysis/LoopCacheAnalysis.cpp @@ -347,15 +347,21 @@ SE.delinearize(AccessFn, Subscripts, Sizes, SE.getElementSize(&StoreOrLoadInst)); + // Eliminate the innermost subscript (for the byte offset to the array + // element). LoopCacheAnalysis assumes it is zero. + if (!Subscripts.empty() && Subscripts.back()->isZero()) + Subscripts.pop_back(); + if (Subscripts.empty() || Sizes.empty() || Subscripts.size() != Sizes.size()) { + Subscripts.clear(); + Sizes.clear(); + // Attempt to determine whether we have a single dimensional array access. // before giving up. if (!isOneDimensionalArray(*AccessFn, *ElemSize, *L, SE)) { LLVM_DEBUG(dbgs().indent(2) << "ERROR: failed to delinearize reference\n"); - Subscripts.clear(); - Sizes.clear(); return false; } diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -12467,7 +12467,7 @@ void ScalarEvolution::computeAccessFunctions( const SCEV *Expr, SmallVectorImpl &Subscripts, - SmallVectorImpl &Sizes) { + ArrayRef Sizes) { // Early exit in case this SCEV is not an affine multivariate function. if (Sizes.empty()) return; @@ -12492,20 +12492,6 @@ Res = Q; - // Do not record the last subscript corresponding to the size of elements in - // the array. - if (i == Last) { - - // Bail out if the remainder is too complex. - if (isa(R)) { - Subscripts.clear(); - Sizes.clear(); - return; - } - - continue; - } - // Record the access function for the current subscript. Subscripts.push_back(R); } @@ -12554,7 +12540,7 @@ /// and then SCEV->delinearize determines the size of some of the dimensions of /// the array as these are the multiples by which the strides are happening: /// -/// CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes. +/// CHECK: ArrayDecl[UnknownSize][%m][%o][sizeof(double)] /// /// Note that the outermost dimension remains of UnknownSize because there are /// no strides that would help identifying the size of the last dimension: when @@ -12565,7 +12551,7 @@ /// Finally delinearize provides the access functions for the array reference /// that does correspond to A[i][j][k] of the above C testcase: /// -/// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>] +/// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>][0] /// /// The testcases are checking the output of a function pass: /// DelinearizationPass that walks through all loads and stores of a function diff --git a/llvm/test/Analysis/Delinearization/a.ll b/llvm/test/Analysis/Delinearization/a.ll --- a/llvm/test/Analysis/Delinearization/a.ll +++ b/llvm/test/Analysis/Delinearization/a.ll @@ -10,8 +10,8 @@ ; AddRec: {{{(28 + (4 * (-4 + (3 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(12 * %o)}<%for.j>,+,20}<%for.k> ; CHECK: Base offset: %A -; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 4 bytes. -; CHECK: ArrayRef[{3,+,2}<%for.i>][{-4,+,3}<%for.j>][{7,+,5}<%for.k>] +; CHECK: ArrayDecl[UnknownSize][%m][%o][4] +; CHECK: ArrayRef[{3,+,2}<%for.i>][{-4,+,3}<%for.j>][{7,+,5}<%for.k>][0] define void @foo(i64 %n, i64 %m, i64 %o, i32* nocapture %A) #0 { entry: diff --git a/llvm/test/Analysis/Delinearization/byte_offset.ll b/llvm/test/Analysis/Delinearization/byte_offset.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/Delinearization/byte_offset.ll @@ -0,0 +1,59 @@ +; RUN: opt < %s -analyze -enable-new-pm=0 -delinearize | FileCheck %s +; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s + +; CHECK: Inst: store float 0.000000e+00, float* %arrayidx.i4563, align 4 +; CHECK: In Loop with Header: for.body.i4567 +; CHECK: AccessFunction: {((sext i32 %mul7.i4534 to i64) + {(24 + (sext i32 %i1 to i64) + (16 * (sext i16 %i401 to i64))),+,((sext i32 (1 + ((1 + %shl.i.i) * (1 + %shl.i.i)) + %shl.i.i) to i64) * (sext i32 %i1 to i64))}<%for.body703>),+,4}<%for.body.i4567> +; CHECK: Base offset: %i400 +; CHECK: ArrayDecl[UnknownSize][((sext i32 (1 + ((1 + %shl.i.i) * (1 + %shl.i.i)) + %shl.i.i) to i64) * (sext i32 %i1 to i64))][4] +; CHECK: ArrayRef[0][{(6 + (4 * (sext i16 %i401 to i64))),+,1}<%for.body.i4567>][((sext i32 %mul7.i4534 to i64) + {(sext i32 %i1 to i64),+,((sext i32 (1 + ((1 + %shl.i.i) * (1 + %shl.i.i)) + %shl.i.i) to i64) * (sext i32 %i1 to i64))}<%for.body703>)] + +%struct.S1 = type { %struct.S1*, i8*, i16, i16, i16, i16 } +%struct.S2 = type { %struct.S2*, i8*, i16, i16, i16, i16, %struct.S3**, %struct.S1** } +%struct.S3 = type { %struct.S3*, i8*, i16, i16, float, %struct.S2*, %struct.S2*, %struct.S1** } + +define void @foo() { +entry: + %i1 = load i32, i32* undef, align 8 + %shl.i.i = shl nuw i32 1, undef + %add.i.i = add nuw nsw i32 %shl.i.i, 1 + %mul.i = mul nsw i32 %add.i.i, %add.i.i + %add.i1797 = add nsw i32 %mul.i, %add.i.i + %i398 = sext i32 %add.i1797 to i64 + %i399 = sext i32 %i1 to i64 + %i400 = load %struct.S1*, %struct.S1** undef, align 8 + %i401 = load i16, i16* undef, align 8 + %arrayidx.i.i4361 = getelementptr inbounds %struct.S1, %struct.S1* %i400, i64 1 + %i402 = bitcast %struct.S1* %arrayidx.i.i4361 to %struct.S2** + %idxprom.i4363 = sext i16 %i401 to i64 + %arrayidx.i4364 = getelementptr inbounds %struct.S2*, %struct.S2** %i402, i64 %idxprom.i4363 + %arrayidx2.i.i4377 = getelementptr inbounds %struct.S2*, %struct.S2** %arrayidx.i4364, i64 %idxprom.i4363 + %i403 = bitcast %struct.S2** %arrayidx2.i.i4377 to i8* + br label %for.body703 + +for.body703: ; preds = %for.end767, %entry + %indvars.iv5141 = phi i64 [ 0, %entry ], [ %indvars.iv.next5142, %for.end767 ] + %i412 = mul nsw i64 %indvars.iv5141, %i398 + %i413 = add nsw i64 %i412, 1 + %i414 = mul nsw i64 %i413, %i399 + %add.ptr.i4384 = getelementptr inbounds i8, i8* %i403, i64 %i414 + br i1 undef, label %for.body736, label %for.end767 + +for.body736: ; preds = %for.body703 + %mul7.i4534 = shl i32 undef, undef + %idxprom.i4535 = sext i32 %mul7.i4534 to i64 + %arrayidx.i4536 = getelementptr inbounds i8, i8* %add.ptr.i4384, i64 %idxprom.i4535 + %i438 = bitcast i8* %arrayidx.i4536 to float* + br label %for.body.i4567 + +for.body.i4567: ; preds = %for.body.i4567, %for.body736 + %indvars.iv.i4562 = phi i64 [ %indvars.iv.next.i4565, %for.body.i4567 ], [ 0, %for.body736 ] + %arrayidx.i4563 = getelementptr inbounds float, float* %i438, i64 %indvars.iv.i4562 + store float 0.0, float* %arrayidx.i4563, align 4 + %indvars.iv.next.i4565 = add nuw nsw i64 %indvars.iv.i4562, 1 + br label %for.body.i4567 + +for.end767: ; preds = %for.body703 + %indvars.iv.next5142 = add nuw nsw i64 %indvars.iv5141, 1 + br label %for.body703 +} diff --git a/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll b/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll --- a/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll +++ b/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll @@ -7,15 +7,15 @@ ; CHECK-NEXT: In Loop with Header: for.inc ; CHECK-NEXT: AccessFunction: {(4 * %N * %call),+,4}<%for.inc> ; CHECK-NEXT: Base offset: %A -; CHECK-NEXT: ArrayDecl[UnknownSize][%N] with elements of 4 bytes. -; CHECK-NEXT: ArrayRef[%call][{0,+,1}<%for.inc>] +; CHECK-NEXT: ArrayDecl[UnknownSize][%N][4] +; CHECK-NEXT: ArrayRef[%call][{0,+,1}<%for.inc>][0] ; CHECK: Inst: %tmp5 = load float, float* %arrayidx4, align 4 ; CHECK-NEXT: In Loop with Header: for.inc ; CHECK-NEXT: AccessFunction: {(4 * %call1),+,(4 * %N)}<%for.inc> ; CHECK-NEXT: Base offset: %B -; CHECK-NEXT: ArrayDecl[UnknownSize][%N] with elements of 4 bytes. -; CHECK-NEXT: ArrayRef[{0,+,1}<%for.inc>][%call1] +; CHECK-NEXT: ArrayDecl[UnknownSize][%N][4] +; CHECK-NEXT: ArrayRef[{0,+,1}<%for.inc>][%call1][0] ; Function Attrs: noinline nounwind uwtable define void @mat_mul(float* %C, float* %A, float* %B, i64 %N) #0 !kernel_arg_addr_space !2 !kernel_arg_access_qual !3 !kernel_arg_type !4 !kernel_arg_base_type !4 !kernel_arg_type_qual !5 { diff --git a/llvm/test/Analysis/Delinearization/divide_by_one.ll b/llvm/test/Analysis/Delinearization/divide_by_one.ll --- a/llvm/test/Analysis/Delinearization/divide_by_one.ll +++ b/llvm/test/Analysis/Delinearization/divide_by_one.ll @@ -14,14 +14,14 @@ ; AddRec: {{(-1 + ((1 + %bs) * %stride)),+,(-1 * %stride)}<%for.cond1.preheader>,+,1}<%for.body3> ; CHECK: Inst: %0 = load i8, i8* %arrayidx, align 1 ; CHECK: Base offset: %dst -; CHECK: ArrayDecl[UnknownSize][%stride] with elements of 1 bytes. -; CHECK: ArrayRef[{(1 + %bs),+,-1}<%for.cond1.preheader>][{-1,+,1}<%for.body3>] +; CHECK: ArrayDecl[UnknownSize][%stride][1] +; CHECK: ArrayRef[{(1 + %bs),+,-1}<%for.cond1.preheader>][{-1,+,1}<%for.body3>][0] ; AddRec: {{(%stride * %bs),+,(-1 * %stride)}<%for.cond1.preheader>,+,1}<%for.body3> ; CHECK: Inst: store i8 %0, i8* %arrayidx7, align 1 ; CHECK: Base offset: %dst -; CHECK: ArrayDecl[UnknownSize][%stride] with elements of 1 bytes. -; CHECK: ArrayRef[{%bs,+,-1}<%for.cond1.preheader>][{0,+,1}<%for.body3>] +; CHECK: ArrayDecl[UnknownSize][%stride][1] +; CHECK: ArrayRef[{%bs,+,-1}<%for.cond1.preheader>][{0,+,1}<%for.body3>][0] define void @test(i8* nocapture %dst, i32 %stride, i32 %bs) { entry: diff --git a/llvm/test/Analysis/Delinearization/himeno_1.ll b/llvm/test/Analysis/Delinearization/himeno_1.ll --- a/llvm/test/Analysis/Delinearization/himeno_1.ll +++ b/llvm/test/Analysis/Delinearization/himeno_1.ll @@ -29,8 +29,8 @@ ; AddRec: {{{(4 + (4 * (sext i32 %a.deps to i64) * (1 + (sext i32 %a.cols to i64))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>,+,(4 * (sext i32 %a.deps to i64))}<%for.j>,+,4}<%for.k> ; CHECK: Base offset: %a.base -; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.cols to i64)][(sext i32 %a.deps to i64)] with elements of 4 bytes. -; CHECK: ArrayRef[{1,+,1}<%for.i>][{1,+,1}<%for.j>][{1,+,1}<%for.k>] +; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.cols to i64)][(sext i32 %a.deps to i64)][4] +; CHECK: ArrayRef[{1,+,1}<%for.i>][{1,+,1}<%for.j>][{1,+,1}<%for.k>][0] %struct.Mat = type { float*, i32, i32, i32, i32 } diff --git a/llvm/test/Analysis/Delinearization/himeno_2.ll b/llvm/test/Analysis/Delinearization/himeno_2.ll --- a/llvm/test/Analysis/Delinearization/himeno_2.ll +++ b/llvm/test/Analysis/Delinearization/himeno_2.ll @@ -29,8 +29,8 @@ ; AddRec: {{{(4 + (4 * (sext i32 %a.deps to i64) * (1 + (sext i32 %a.cols to i64))) + %a.base),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>,+,(4 * (sext i32 %a.deps to i64))}<%for.j>,+,4}<%for.k> ; CHECK: Base offset: %a.base -; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.cols to i64)][(sext i32 %a.deps to i64)] with elements of 4 bytes. -; CHECK: ArrayRef[{1,+,1}<%for.i>][{1,+,1}<%for.j>][{1,+,1}<%for.k>] +; CHECK: ArrayDecl[UnknownSize][(sext i32 %a.cols to i64)][(sext i32 %a.deps to i64)][4] +; CHECK: ArrayRef[{1,+,1}<%for.i>][{1,+,1}<%for.j>][{1,+,1}<%for.k>][0] %struct.Mat = type { float*, i32, i32, i32, i32 } diff --git a/llvm/test/Analysis/Delinearization/iv_times_constant_in_subscript.ll b/llvm/test/Analysis/Delinearization/iv_times_constant_in_subscript.ll --- a/llvm/test/Analysis/Delinearization/iv_times_constant_in_subscript.ll +++ b/llvm/test/Analysis/Delinearization/iv_times_constant_in_subscript.ll @@ -11,8 +11,8 @@ ; AddRec: {{((%m * %b * 8) + %A),+,(2 * %m * 8)}<%for.i>,+,(2 * 8)}<%for.j> ; CHECK: Base offset: %A -; CHECK: ArrayDecl[UnknownSize][%m] with elements of 8 bytes. -; CHECK: ArrayRef[{%b,+,2}<%for.i>][{0,+,2}<%for.j>] +; CHECK: ArrayDecl[UnknownSize][%m][8] +; CHECK: ArrayRef[{%b,+,2}<%for.i>][{0,+,2}<%for.j>][0] define void @foo(i64 %n, i64 %m, i64 %b, double* %A) { diff --git a/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll b/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll --- a/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll +++ b/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll @@ -11,8 +11,8 @@ ; AddRec: {{{(56 + (8 * (-4 + (3 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k> ; CHECK: Base offset: %A -; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes. -; CHECK: ArrayRef[{3,+,1}<%for.i>][{-4,+,1}<%for.j>][{7,+,1}<%for.k>] +; CHECK: ArrayDecl[UnknownSize][%m][%o][8] +; CHECK: ArrayRef[{3,+,1}<%for.i>][{-4,+,1}<%for.j>][{7,+,1}<%for.k>][0] define void @foo(i64 %n, i64 %m, i64 %o, double* %A) { entry: diff --git a/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll b/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll --- a/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll +++ b/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll @@ -11,8 +11,8 @@ ; AddRec: {{{(56 + (8 * (-4 + (3 * %m)) * (%o + %p)) + %A),+,(8 * (%o + %p) * %m)}<%for.cond4.preheader.lr.ph.us>,+,(8 * (%o + %p))}<%for.body6.lr.ph.us.us>,+,8}<%for.body6.us.us> ; CHECK: Base offset: %A -; CHECK: ArrayDecl[UnknownSize][%m][(%o + %p)] with elements of 8 bytes. -; CHECK: ArrayRef[{3,+,1}<%for.cond4.preheader.lr.ph.us>][{-4,+,1}<%for.body6.lr.ph.us.us>][{7,+,1}<%for.body6.us.us>] +; CHECK: ArrayDecl[UnknownSize][%m][(%o + %p)][8] +; CHECK: ArrayRef[{3,+,1}<%for.cond4.preheader.lr.ph.us>][{-4,+,1}<%for.body6.lr.ph.us.us>][{7,+,1}<%for.body6.us.us>][0] define void @foo(i64 %n, i64 %m, i64 %o, i64 %p, double* nocapture %A) nounwind uwtable { entry: diff --git a/llvm/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll b/llvm/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll --- a/llvm/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll +++ b/llvm/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll @@ -11,8 +11,8 @@ ; AddRec: {{{((8 * ((((%m * %p) + %q) * %o) + %r)) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k> ; CHECK: Base offset: %A -; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes. -; CHECK: ArrayRef[{%p,+,1}<%for.i>][{%q,+,1}<%for.j>][{%r,+,1}<%for.k>] +; CHECK: ArrayDecl[UnknownSize][%m][%o][8] +; CHECK: ArrayRef[{%p,+,1}<%for.i>][{%q,+,1}<%for.j>][{%r,+,1}<%for.k>][0] define void @foo(i64 %n, i64 %m, i64 %o, double* %A, i64 %p, i64 %q, i64 %r) { entry: diff --git a/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll b/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll --- a/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll +++ b/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll @@ -13,15 +13,15 @@ ; In Loop with Header: for.j ; AddRec: {{0,+,(%m * 8)}<%for.i>,+,8}<%for.j> ; Base offset: %A -; ArrayDecl[UnknownSize][%m] with elements of 8 bytes. -; ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>] +; ArrayDecl[UnknownSize][%m][8] +; ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][0] ; Inst: store double %val, double* %arrayidx ; In Loop with Header: for.j ; AddRec: {{%A,+,(8 * %m)}<%for.i>,+,8}<%for.j> ; CHECK: Base offset: %A -; CHECK: ArrayDecl[UnknownSize][%m] with elements of 8 bytes. -; CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>] +; CHECK: ArrayDecl[UnknownSize][%m][8] +; CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][0] define void @foo(i64 %n, i64 %m, double* %A) { entry: diff --git a/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d.ll b/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d.ll --- a/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d.ll +++ b/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d.ll @@ -11,8 +11,8 @@ ; AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k> ; CHECK: Base offset: %A -; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes. -; CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>] +; CHECK: ArrayDecl[UnknownSize][%m][%o][8] +; CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>][0] define void @foo(i64 %n, i64 %m, i64 %o, double* %A) { entry: diff --git a/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll b/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll --- a/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll +++ b/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll @@ -10,8 +10,8 @@ ; AddRec: {{{%A,+,(8 * (zext i32 %m to i64) * (zext i32 %o to i64))}<%for.i>,+,(8 * (zext i32 %o to i64))}<%for.j>,+,8}<%for.k> ; CHECK: Base offset: %A -; CHECK: ArrayDecl[UnknownSize][(zext i32 %m to i64)][(zext i32 %o to i64)] with elements of 8 bytes. -; CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>] +; CHECK: ArrayDecl[UnknownSize][(zext i32 %m to i64)][(zext i32 %o to i64)][8] +; CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>][0] target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Analysis/Delinearization/parameter_addrec_product.ll b/llvm/test/Analysis/Delinearization/parameter_addrec_product.ll --- a/llvm/test/Analysis/Delinearization/parameter_addrec_product.ll +++ b/llvm/test/Analysis/Delinearization/parameter_addrec_product.ll @@ -7,8 +7,8 @@ ; A[i * (*p) + j] += i + j; ; } ; -; CHECK: ArrayDecl[UnknownSize][%pval] with elements of 4 bytes. -; CHECK: ArrayRef[{0,+,1}<%bb2>][{0,+,1}<%bb4>] +; CHECK: ArrayDecl[UnknownSize][%pval][4] +; CHECK: ArrayRef[{0,+,1}<%bb2>][{0,+,1}<%bb4>][0] ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/polly/lib/Analysis/ScopDetection.cpp b/polly/lib/Analysis/ScopDetection.cpp --- a/polly/lib/Analysis/ScopDetection.cpp +++ b/polly/lib/Analysis/ScopDetection.cpp @@ -982,6 +982,11 @@ return true; } +static bool isKnownLessOrEqual(ScalarEvolution &SE, const SCEV *LHS, + const SCEV *RHS) { + return SE.isKnownPredicate(ICmpInst::ICMP_SLE, LHS, RHS); +} + // We first store the resulting memory accesses in TempMemoryAccesses. Only // if the access functions for all memory accesses have been successfully // delinearized we continue. Otherwise, we either report a failure or, if @@ -1014,8 +1019,24 @@ } else { SE.computeAccessFunctions(AF, Acc->DelinearizedSubscripts, Shape->DelinearizedSizes); - if (Acc->DelinearizedSubscripts.size() == 0) + if (Acc->DelinearizedSubscripts.size() == 0) { IsNonAffine = true; + } else { + // TODO: Do not drop the last DelinearizedSubscripts. If non-zero it + // must be preseved for code-generation such that it represents the + // same pointer. + const SCEV *AccOffset = Acc->DelinearizedSubscripts.pop_back_val(); + const SCEV *ArrSize = Shape->DelinearizedSizes.back(); + const SCEV *AccSize = Context.ElementSize[BasePointer]; + if (!AccOffset->isZero() || + !isKnownLessOrEqual(SE, AccSize, ArrSize)) { + LLVM_DEBUG(dbgs() + << "Accessed element bytes [" << *AccOffset << ",+" + << *AccSize << ") possibly outside element [0," + << *ArrSize << ")\n"); + IsNonAffine = true; + } + } } for (const SCEV *S : Acc->DelinearizedSubscripts) if (!isAffine(S, Scope, Context)) diff --git a/polly/test/ScopDetect/array_elt_byte_offset.ll b/polly/test/ScopDetect/array_elt_byte_offset.ll new file mode 100644 --- /dev/null +++ b/polly/test/ScopDetect/array_elt_byte_offset.ll @@ -0,0 +1,78 @@ +; RUN: opt %loadPolly -polly-detect -disable-output -debug < %s 2>&1 | FileCheck %s + +; CHECK: Accessed element bytes [{{.+}},+4) possibly outside element [0,4) +; REQUIRES: asserts + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +%struct.CCGFace = type { %struct.CCGFace*, i8*, i16, i16, i16, i16 } +%struct.CCGVert = type { %struct.CCGVert*, i8*, i16, i16, i16, i16, %struct.CCGEdge**, %struct.CCGFace** } +%struct.CCGEdge = type { %struct.CCGEdge*, i8*, i16, i16, float, %struct.CCGVert*, %struct.CCGVert*, %struct.CCGFace** } + +define void @ccgSubSurf__calcSubdivLevel() { +entry: + %shl.i1795 = shl nuw i32 1, undef + %i = load i32, i32* undef, align 8 + %shl.i.i = shl nuw i32 1, undef + %add.i.i = add nuw nsw i32 %shl.i.i, 1 + %mul.i = mul nsw i32 %add.i.i, %add.i.i + %add.i1797 = add nsw i32 %mul.i, %add.i.i + %smax = call i32 @llvm.smax.i32(i32 %shl.i1795, i32 1) + %i1 = sext i32 %add.i1797 to i64 + %i2 = sext i32 %i to i64 + %i3 = load %struct.CCGFace*, %struct.CCGFace** undef, align 8 + %i4 = load i16, i16* undef, align 8 + %arrayidx.i.i.i.i = getelementptr inbounds %struct.CCGFace, %struct.CCGFace* %i3, i64 1 + %i5 = bitcast %struct.CCGFace* %arrayidx.i.i.i.i to %struct.CCGVert** + %idxprom.i.i.i = sext i16 %i4 to i64 + %arrayidx.i.i.i = getelementptr inbounds %struct.CCGVert*, %struct.CCGVert** %i5, i64 %idxprom.i.i.i + %arrayidx2.i.i = getelementptr inbounds %struct.CCGVert*, %struct.CCGVert** %arrayidx.i.i.i, i64 %idxprom.i.i.i + %i6 = bitcast %struct.CCGVert** %arrayidx2.i.i to i8* + %i7 = load i32, i32* undef, align 4 + %wide.trip.count.i.us.us.us = zext i32 %i7 to i64 + br label %for.cond10.preheader.us.us.us + +for.cond10.preheader.us.us.us: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.cond10.for.inc38_crit_edge.split.us.split.us.us.us.us ] + %i8 = mul nsw i64 %indvars.iv, %i1 + %i9 = add nsw i64 %i8, 1 + %i10 = mul nsw i64 %i9, %i2 + %add.ptr.i.us.us.us = getelementptr inbounds i8, i8* %i6, i64 %i10 + br label %for.cond14.preheader.us.us.us.us.us + +for.cond14.preheader.us.us.us.us.us: + br label %for.body18.us.us.us.us.us.us + +for.body18.us.us.us.us.us.us: + %x.04761.us.us.us.us.us.us = phi i32 [ 0, %for.cond14.preheader.us.us.us.us.us ], [ %add25.us.us.us.us.us.us, %VertDataAvg4.exit.loopexit.us.us.us.us.us.us ] + %mul8.i.us.us.us.us.us.us = shl i32 undef, undef + %add9.i.us.us.us.us.us.us = add nsw i32 %mul8.i.us.us.us.us.us.us, %add.i.i + %mul10.i.us.us.us.us.us.us = mul nsw i32 %add9.i.us.us.us.us.us.us, %i + %idxprom.i.us.us.us.us.us.us = sext i32 %mul10.i.us.us.us.us.us.us to i64 + %arrayidx.i.us.us.us.us.us.us = getelementptr inbounds i8, i8* %add.ptr.i.us.us.us, i64 %idxprom.i.us.us.us.us.us.us + %i11 = bitcast i8* %arrayidx.i.us.us.us.us.us.us to float* + %add25.us.us.us.us.us.us = add nuw nsw i32 %x.04761.us.us.us.us.us.us, 1 + br label %for.body.i.us.us.us.us.us.us + +for.body.i.us.us.us.us.us.us: + %indvars.iv.i.us.us.us.us.us.us = phi i64 [ 0, %for.body18.us.us.us.us.us.us ], [ %indvars.iv.next.i.us.us.us.us.us.us, %for.body.i.us.us.us.us.us.us ] + %arrayidx.i1890.us.us.us.us.us.us = getelementptr inbounds float, float* %i11, i64 %indvars.iv.i.us.us.us.us.us.us + %i12 = load float, float* %arrayidx.i1890.us.us.us.us.us.us, align 4 + %indvars.iv.next.i.us.us.us.us.us.us = add nuw nsw i64 %indvars.iv.i.us.us.us.us.us.us, 1 + %exitcond.not.i.us.us.us.us.us.us = icmp eq i64 %indvars.iv.next.i.us.us.us.us.us.us, %wide.trip.count.i.us.us.us + br i1 %exitcond.not.i.us.us.us.us.us.us, label %VertDataAvg4.exit.loopexit.us.us.us.us.us.us, label %for.body.i.us.us.us.us.us.us + +VertDataAvg4.exit.loopexit.us.us.us.us.us.us: + %exitcond.not = icmp eq i32 %add25.us.us.us.us.us.us, %smax + br i1 %exitcond.not, label %for.cond14.for.inc35_crit_edge.split.us.us.us.us.us.us, label %for.body18.us.us.us.us.us.us + +for.cond14.for.inc35_crit_edge.split.us.us.us.us.us.us: + br i1 undef, label %for.cond10.for.inc38_crit_edge.split.us.split.us.us.us.us, label %for.cond14.preheader.us.us.us.us.us + +for.cond10.for.inc38_crit_edge.split.us.split.us.us.us.us: + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond10.preheader.us.us.us +} + +declare i32 @llvm.smax.i32(i32, i32) +