Index: polly/trunk/include/polly/ScopInfo.h =================================================================== --- polly/trunk/include/polly/ScopInfo.h +++ polly/trunk/include/polly/ScopInfo.h @@ -74,7 +74,6 @@ ALIASING, INBOUNDS, WRAPPING, - ALIGNMENT, ERRORBLOCK, INFINITELOOP, INVARIANTLOAD, @@ -761,6 +760,9 @@ /// @brief Return the access instruction of this memory access. Instruction *getAccessInstruction() const { return AccessInstruction; } + /// @brief Return the access function subscript in the dimension @p Dim. + const SCEV *getSubscript(unsigned Dim) const { return Subscripts[Dim]; } + /// Get the stride of this memory access in the specified Schedule. Schedule /// is a map from the statement to a schedule where the innermost dimension is /// the dimension of the innermost loop containing the statement. Index: polly/trunk/lib/Analysis/ScopInfo.cpp =================================================================== --- polly/trunk/lib/Analysis/ScopInfo.cpp +++ polly/trunk/lib/Analysis/ScopInfo.cpp @@ -294,13 +294,17 @@ } void MemoryAccess::updateDimensionality() { - auto ArraySpace = getScopArrayInfo()->getSpace(); + auto *SAI = getScopArrayInfo(); + auto ArraySpace = SAI->getSpace(); auto AccessSpace = isl_space_range(isl_map_get_space(AccessRelation)); + auto *Ctx = isl_space_get_ctx(AccessSpace); auto DimsArray = isl_space_dim(ArraySpace, isl_dim_set); auto DimsAccess = isl_space_dim(AccessSpace, isl_dim_set); auto DimsMissing = DimsArray - DimsAccess; + unsigned ArrayElemSize = SAI->getElemSizeInBytes(); + auto Map = isl_map_from_domain_and_range( isl_set_universe(AccessSpace), isl_set_universe(isl_space_copy(ArraySpace))); @@ -313,12 +317,29 @@ AccessRelation = isl_map_apply_range(AccessRelation, Map); + // For the non delinearized arrays, divide the access function of the last + // subscript by the size of the elements in the array. + // + // A stride one array access in C expressed as A[i] is expressed in + // LLVM-IR as something like A[i * elementsize]. This hides the fact that + // two subsequent values of 'i' index two values that are stored next to + // each other in memory. By this division we make this characteristic + // obvious again. If the base pointer was accessed with offsets not divisible + // by the accesses element size, we will have choosen a smaller ArrayElemSize + // that divides the offsets of all accesses to this base pointer. + if (DimsAccess == 1) { + isl_val *V = isl_val_int_from_si(Ctx, ArrayElemSize); + AccessRelation = isl_map_floordiv_val(AccessRelation, V); + } + + if (!isAffine()) + computeBoundsOnAccessRelation(ArrayElemSize); + // Introduce multi-element accesses in case the type loaded by this memory // access is larger than the canonical element type of the array. // // An access ((float *)A)[i] to an array char *A is modeled as // {[i] -> A[o] : 4 i <= o <= 4 i + 3 - unsigned ArrayElemSize = getScopArrayInfo()->getElemSizeInBytes(); if (ElemBytes > ArrayElemSize) { assert(ElemBytes % ArrayElemSize == 0 && "Loaded element size should be multiple of canonical element size"); @@ -328,24 +349,20 @@ for (unsigned i = 0; i < DimsArray - 1; i++) Map = isl_map_equate(Map, isl_dim_in, i, isl_dim_out, i); - isl_ctx *Ctx; isl_constraint *C; isl_local_space *LS; LS = isl_local_space_from_space(isl_map_get_space(Map)); - Ctx = isl_map_get_ctx(Map); int Num = ElemBytes / getScopArrayInfo()->getElemSizeInBytes(); C = isl_constraint_alloc_inequality(isl_local_space_copy(LS)); C = isl_constraint_set_constant_val(C, isl_val_int_from_si(Ctx, Num - 1)); - C = isl_constraint_set_coefficient_si(C, isl_dim_in, - DimsArray - 1 - DimsMissing, Num); + C = isl_constraint_set_coefficient_si(C, isl_dim_in, DimsArray - 1, 1); C = isl_constraint_set_coefficient_si(C, isl_dim_out, DimsArray - 1, -1); Map = isl_map_add_constraint(Map, C); C = isl_constraint_alloc_inequality(LS); - C = isl_constraint_set_coefficient_si(C, isl_dim_in, - DimsArray - 1 - DimsMissing, -Num); + C = isl_constraint_set_coefficient_si(C, isl_dim_in, DimsArray - 1, -1); C = isl_constraint_set_coefficient_si(C, isl_dim_out, DimsArray - 1, 1); C = isl_constraint_set_constant_val(C, isl_val_int_from_si(Ctx, 0)); Map = isl_map_add_constraint(Map, C); @@ -681,6 +698,8 @@ /// @brief Check if @p Expr is divisible by @p Size. static bool isDivisible(const SCEV *Expr, unsigned Size, ScalarEvolution &SE) { + if (Size == 1) + return true; // Only one factor needs to be divisible. if (auto *MulExpr = dyn_cast(Expr)) { @@ -719,37 +738,15 @@ AccessRelation = isl_map_from_basic_map(createBasicAccessMap(Statement)); AccessRelation = isl_map_set_tuple_id(AccessRelation, isl_dim_out, BaseAddrId); - - computeBoundsOnAccessRelation(getElemSizeInBytes()); return; } - Scop &S = *getStatement()->getParent(); isl_space *Space = isl_space_alloc(Ctx, 0, Statement->getNumIterators(), 0); AccessRelation = isl_map_universe(Space); for (int i = 0, Size = Subscripts.size(); i < Size; ++i) { isl_pw_aff *Affine = Statement->getPwAff(Subscripts[i]); - - if (Size == 1) { - // For the non delinearized arrays, divide the access function of the last - // subscript by the size of the elements in the array. - // - // A stride one array access in C expressed as A[i] is expressed in - // LLVM-IR as something like A[i * elementsize]. This hides the fact that - // two subsequent values of 'i' index two values that are stored next to - // each other in memory. By this division we make this characteristic - // obvious again. However, if the index is not divisible by the element - // size we will bail out. - isl_val *v = isl_val_int_from_si(Ctx, getElemSizeInBytes()); - Affine = isl_pw_aff_scale_down_val(Affine, v); - - if (!isDivisible(Subscripts[0], getElemSizeInBytes(), *S.getSE())) - S.invalidate(ALIGNMENT, AccessInstruction->getDebugLoc()); - } - isl_map *SubscriptMap = isl_map_from_pw_aff(Affine); - AccessRelation = isl_map_flat_range_product(AccessRelation, SubscriptMap); } @@ -2812,6 +2809,24 @@ } void Scop::updateAccessDimensionality() { + // Check all array accesses for each base pointer and find a (virtual) element + // size for the base pointer that divides all access functions. + for (auto &Stmt : *this) + for (auto *Access : Stmt) { + if (!Access->isArrayKind()) + continue; + auto &SAI = ScopArrayInfoMap[std::make_pair(Access->getBaseAddr(), + ScopArrayInfo::MK_Array)]; + if (SAI->getNumberOfDimensions() != 1) + continue; + unsigned DivisibleSize = SAI->getElemSizeInBytes(); + auto *Subscript = Access->getSubscript(0); + while (!isDivisible(Subscript, DivisibleSize, *SE)) + DivisibleSize /= 2; + auto *Ty = IntegerType::get(SE->getContext(), DivisibleSize * 8); + SAI->updateElementType(Ty); + } + for (auto &Stmt : *this) for (auto &Access : Stmt) Access->updateDimensionality(); @@ -3146,8 +3161,6 @@ return "Inbounds"; case WRAPPING: return "No-overflows"; - case ALIGNMENT: - return "Alignment"; case ERRORBLOCK: return "No-error"; case INFINITELOOP: Index: polly/trunk/test/Isl/CodeGen/invariant_cannot_handle_void.ll =================================================================== --- polly/trunk/test/Isl/CodeGen/invariant_cannot_handle_void.ll +++ polly/trunk/test/Isl/CodeGen/invariant_cannot_handle_void.ll @@ -1,9 +1,27 @@ +; RUN: opt %loadPolly -analyze -polly-scops %s | FileCheck %s --check-prefix=SCOP ; RUN: opt %loadPolly -S -polly-codegen %s | FileCheck %s ; ; The offset of the %tmp1 load wrt. to %buff (62 bytes) is not divisible -; by the type size (i32 = 4 bytes), thus we will bail out. +; by the type size (i32 = 4 bytes), thus we will have to represent %buff +; with a smaller type. In this case i16 is sufficient to represent the +; 62 byte offset accurately. ; -; CHECK-NOT: polly.start +; SCOP: Invariant Accesses: { +; SCOP: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; SCOP: { Stmt_if_end_6[] -> MemRef_buff[o0] : 31 <= o0 <= 32 }; +; SCOP: Execution Context: { : } +; SCOP: } +; SCOP: Arrays { +; SCOP: i16 MemRef_buff[*]; // Element size 2 +; SCOP: i32 MemRef_key_0; // Element size 4 +; SCOP: } +; +; CHECK-LABEL: polly.preload.begin: +; CHECK-NEXT: %polly.access.cast.buff = bitcast i8* %buff to i16* +; CHECK-NEXT: %polly.access.buff = getelementptr i16, i16* %polly.access.cast.buff, i64 31 +; CHECK-NEXT: %polly.access.buff.cast = bitcast i16* %polly.access.buff to i32* +; CHECK-NEXT: %polly.access.buff.load = load i32, i32* %polly.access.buff.cast, align 4 +; CHECK-NEXT: store i32 %polly.access.buff.load, i32* %tmp1.preload.s2a ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: polly/trunk/test/ScopInfo/multiple-types-access-offset-not-dividable-by-element-size.ll =================================================================== --- polly/trunk/test/ScopInfo/multiple-types-access-offset-not-dividable-by-element-size.ll +++ polly/trunk/test/ScopInfo/multiple-types-access-offset-not-dividable-by-element-size.ll @@ -12,12 +12,35 @@ ; } ; } ; -; Polly currently does not allow such cases (even without multiple accesses of -; different type being involved). -; TODO: Add support for such kind of accesses -; -; -; CHECK: Alignment assumption: { : 1 = 0 } +; CHECK: Arrays { +; CHECK: i8 MemRef_Short[*]; // Element size 1 +; CHECK: i8 MemRef_Float[*]; // Element size 1 +; CHECK: i8 MemRef_Double[*]; // Element size 1 +; CHECK: } +; CHECK: Arrays (Bounds as pw_affs) { +; CHECK: i8 MemRef_Short[*]; // Element size 1 +; CHECK: i8 MemRef_Float[*]; // Element size 1 +; CHECK: i8 MemRef_Double[*]; // Element size 1 +; CHECK: } +; CHECK: Statements { +; CHECK: Stmt_bb2 +; CHECK: Domain := +; CHECK: { Stmt_bb2[i0] : 0 <= i0 <= 99 }; +; CHECK: Schedule := +; CHECK: { Stmt_bb2[i0] -> [i0] }; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_bb2[i0] -> MemRef_Short[o0] : i0 <= o0 <= 1 + i0 }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_bb2[i0] -> MemRef_Short[i0] }; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_bb2[i0] -> MemRef_Float[o0] : i0 <= o0 <= 3 + i0 }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_bb2[i0] -> MemRef_Float[i0] }; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_bb2[i0] -> MemRef_Double[o0] : i0 <= o0 <= 7 + i0 }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_bb2[i0] -> MemRef_Double[i0] }; +; CHECK: } target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: polly/trunk/test/ScopInfo/multiple-types-non-power-of-two-2.ll =================================================================== --- polly/trunk/test/ScopInfo/multiple-types-non-power-of-two-2.ll +++ polly/trunk/test/ScopInfo/multiple-types-non-power-of-two-2.ll @@ -1,7 +1,7 @@ ; RUN: opt %loadPolly -polly-scops -analyze \ ; RUN: -polly-allow-differing-element-types < %s | FileCheck %s ; -; void multiple_types(i128 *A) { +; void multiple_types(i8 *A) { ; for (long i = 0; i < 100; i++) { ; A[i] = *(i128 *)&A[16 * i] + ; *(i192 *)&A[24 * i];