Index: polly/trunk/include/polly/ScopInfo.h =================================================================== --- polly/trunk/include/polly/ScopInfo.h +++ polly/trunk/include/polly/ScopInfo.h @@ -650,6 +650,21 @@ /// @param SAI Info object for the accessed array. void buildAccessRelation(const ScopArrayInfo *SAI); + /// @brief Carry index overflows of dimensions with constant size to the next + /// higher dimension. + /// + /// For dimensions that have constant size, modulo the index by the size and + /// add up the carry (floored division) to the next higher dimension. This is + /// how overflow is defined in row-major order. + /// It happens e.g. when ScalarEvolution computes the offset to the base + /// pointer and would algebraically sum up all lower dimensions' indices of + /// constant size. + /// + /// Example: + /// float (*A)[4]; + /// A[1][6] -> A[2][2] + void wrapConstantDimensions(); + public: /// @brief Create a new MemoryAccess. /// Index: polly/trunk/lib/Analysis/ScopInfo.cpp =================================================================== --- polly/trunk/lib/Analysis/ScopInfo.cpp +++ polly/trunk/lib/Analysis/ScopInfo.cpp @@ -289,6 +289,58 @@ return SAI; } +void MemoryAccess::wrapConstantDimensions() { + auto *SAI = getScopArrayInfo(); + auto *ArraySpace = SAI->getSpace(); + auto *Ctx = isl_space_get_ctx(ArraySpace); + unsigned DimsArray = SAI->getNumberOfDimensions(); + + auto *DivModAff = isl_multi_aff_identity(isl_space_map_from_domain_and_range( + isl_space_copy(ArraySpace), isl_space_copy(ArraySpace))); + auto *LArraySpace = isl_local_space_from_space(ArraySpace); + + // Begin with last dimension, to iteratively carry into higher dimensions. + for (int i = DimsArray - 1; i > 0; i--) { + auto *DimSize = SAI->getDimensionSize(i); + auto *DimSizeCst = dyn_cast(DimSize); + + // This transformation is not applicable to dimensions with dynamic size. + if (!DimSizeCst) + continue; + + auto *DimSizeVal = isl_valFromAPInt(Ctx, DimSizeCst->getAPInt(), false); + auto *Var = isl_aff_var_on_domain(isl_local_space_copy(LArraySpace), + isl_dim_set, i); + auto *PrevVar = isl_aff_var_on_domain(isl_local_space_copy(LArraySpace), + isl_dim_set, i - 1); + + // Compute: index % size + // Modulo must apply in the divide of the previous iteration, if any. + auto *Modulo = isl_aff_copy(Var); + Modulo = isl_aff_mod_val(Modulo, isl_val_copy(DimSizeVal)); + Modulo = isl_aff_pullback_multi_aff(Modulo, isl_multi_aff_copy(DivModAff)); + + // Compute: floor(index / size) + auto *Divide = Var; + Divide = isl_aff_div( + Divide, + isl_aff_val_on_domain(isl_local_space_copy(LArraySpace), DimSizeVal)); + Divide = isl_aff_floor(Divide); + Divide = isl_aff_add(Divide, PrevVar); + Divide = isl_aff_pullback_multi_aff(Divide, isl_multi_aff_copy(DivModAff)); + + // Apply Modulo and Divide. + DivModAff = isl_multi_aff_set_aff(DivModAff, i, Modulo); + DivModAff = isl_multi_aff_set_aff(DivModAff, i - 1, Divide); + } + + // Apply all modulo/divides on the accesses. + AccessRelation = + isl_map_apply_range(AccessRelation, isl_map_from_multi_aff(DivModAff)); + AccessRelation = isl_map_detect_equalities(AccessRelation); + isl_local_space_free(LArraySpace); +} + void MemoryAccess::updateDimensionality() { auto *SAI = getScopArrayInfo(); auto *ArraySpace = SAI->getSpace(); @@ -331,6 +383,14 @@ AccessRelation = isl_map_floordiv_val(AccessRelation, V); } + // We currently do this only if we added at least one dimension, which means + // some dimension's indices have not been specified, an indicator that some + // index values have been added together. + // TODO: Investigate general usefulness; Effect on unit tests is to make index + // expressions more complicated. + if (DimsMissing) + wrapConstantDimensions(); + if (!isAffine()) computeBoundsOnAccessRelation(ArrayElemSize); Index: polly/trunk/test/ScopInfo/multidim_fixedsize_multi_offset.ll =================================================================== --- polly/trunk/test/ScopInfo/multidim_fixedsize_multi_offset.ll +++ polly/trunk/test/ScopInfo/multidim_fixedsize_multi_offset.ll @@ -1,8 +1,20 @@ -; RUN: opt %loadPolly -polly-detect -analyze < %s | FileCheck %s -check-prefix=DETECT ; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s ; -; DETECT: Valid Region for Scop: for.cond => for.end -; CHECK-NOT: Region: %for.cond---%for.end +; CHECK: Statements { +; CHECK-NEXT: Stmt_for_body +; CHECK-NEXT: Domain := +; CHECK-NEXT: { Stmt_for_body[i0] : 0 <= i0 <= 99 }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: { Stmt_for_body[i0] -> [i0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: +] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef_A[i0, 0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: +] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef_A[i0, 0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: +] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef_A[1 + i0, 0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: +] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef_A[1 + i0, 0] }; +; CHECK-NEXT: } ; ; void f(int A[][2]) { ; int(*B)[2] = &A[0][0]; @@ -13,11 +25,8 @@ ; } ; } ; -; This test case makes sure we do not miss parts of the array subscript -; functions, which we previously did by only considering the first of a chain -; of GEP instructions. Today, we detect this situation and do not delinearize -; the relevant memory access. In the future, we may want to detect this -; pattern and combine multiple GEP functions together. +; Verify that the additional offset to A by accessing it through C is taken into +; account. ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: polly/trunk/test/ScopInfo/process_added_dimensions.ll =================================================================== --- polly/trunk/test/ScopInfo/process_added_dimensions.ll +++ polly/trunk/test/ScopInfo/process_added_dimensions.ll @@ -1,20 +1,32 @@ -; RUN: opt %loadPolly -polly-detect -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s -; This test case produces the following memory access which we try hoist: -; { Stmt_for_cond40_preheader_5[i0] -> MemRef_call[0, 0, 2240] }. However, it -; accesses an array of size "i32 MemRef_call[*][6][64]". That is why we -; should turn the whole SCoP into an invalid SCoP using corresponding bounds -; checks. Otherwise, we derive the incorrect access. - -; CHECK: Valid Region for Scop: for.cond40.preheader.4 => for.end76 -; CHECK-NOT: Region: %for.cond40.preheader.4---%for.end76 +; CHECK: Invariant Accesses: { +; CHECK-NEXT: } +; CHECK: Statements { +; CHECK-NEXT: Stmt_for_cond40_preheader_4 +; CHECK-NEXT: Domain := +; CHECK-NEXT: { Stmt_for_cond40_preheader_4[i0] : 0 <= i0 <= 1 }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: { Stmt_for_cond40_preheader_4[i0] -> [i0, 0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_cond40_preheader_4[i0] -> MemRef_call[5, 5, 0] }; +; CHECK-NEXT: Stmt_for_cond40_preheader_5 +; CHECK-NEXT: Domain := +; CHECK-NEXT: { Stmt_for_cond40_preheader_5[i0] : 0 <= i0 <= 1 }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: { Stmt_for_cond40_preheader_5[i0] -> [i0, 1] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_cond40_preheader_5[i0] -> MemRef_call[5, 5, 0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_for_cond40_preheader_5[i0] -> MemRef__pre160[] }; +; CHECK-NEXT: } target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" declare noalias i8* @malloc() -define void @main() { +define i32 @main() { entry: %call = tail call noalias i8* @malloc() %0 = bitcast i8* %call to [6 x [6 x [64 x i32]]]* @@ -23,7 +35,7 @@ br label %for.cond40.preheader.4 for.end76: ; preds = %for.inc71.5 - ret void + ret i32 %.pre160 for.cond40.preheader.4: ; preds = %for.inc71.5, %entry %t.0131 = phi i32 [ 0, %entry ], [ %inc75, %for.inc71.5 ]