Index: include/polly/CodeGen/CodeGeneration.h =================================================================== --- include/polly/CodeGen/CodeGeneration.h +++ include/polly/CodeGen/CodeGeneration.h @@ -39,6 +39,7 @@ /// @brief Flag to turn on/off annotation of alias scopes. extern bool PollyAnnotateAliasScopes; +/// FIXME: Depraced and needs to be removed with the cloog backend. static inline int getNumberOfIterations(__isl_take isl_set *Domain) { int Dim = isl_set_dim(Domain, isl_dim_set); Index: include/polly/CodeGen/IslAst.h =================================================================== --- include/polly/CodeGen/IslAst.h +++ include/polly/CodeGen/IslAst.h @@ -139,6 +139,12 @@ /// @brief Get the nodes build context or a nullptr if not available. static __isl_give isl_ast_build *getBuild(__isl_keep isl_ast_node *Node); + /// @brief Get the number of iterations or a nullptr if not available. + static __isl_give isl_pw_aff * + getNumberOfIterations(__isl_keep isl_ast_node *For); + + /// @brief Get the number of iterations as int or -1 if not available. + static int getNumberOfIterationsAsInt(__isl_keep isl_ast_node *For); ///} virtual void getAnalysisUsage(AnalysisUsage &AU) const; Index: include/polly/Support/GICHelper.h =================================================================== --- include/polly/Support/GICHelper.h +++ include/polly/Support/GICHelper.h @@ -75,6 +75,32 @@ std::string getIslCompatibleName(std::string Prefix, const llvm::Value *Val, std::string Suffix); +/// @brief Return @p Aff incremented by @p i +__isl_give isl_pw_aff *incrementPwAff(__isl_take isl_pw_aff *Aff, int i); + +/// @brief Return the only affine piece of @p PA. +__isl_give isl_aff *isl_aff_from_pw_aff(__isl_take isl_pw_aff *PA); + +/// @brief Return an overestimation of the number of elements in @p S. +/// +/// @param S The set of which we want to now a bound on the elements. +/// @param Dim The number of input dimensions we want to keep. +/// @param LexMinPtr If not null the lexmin will be returned in here. +/// @param LexMaxPtr If not null the lexmax will be returned in here. +/// +/// @returns A piecewise affine function over approximating the number of +/// elements in @p S as a function of the first @p Dim dimensions +/// of @p S. Furthermore the lexicographic minimum/maximum of @p S +/// in @p LexMinPtr and @p LexMaxPtr if they are given. +__isl_give isl_pw_multi_aff * +getNumElementsUB(__isl_take isl_set *S, int Dim, + __isl_give isl_map **LexMinPtr = nullptr, + __isl_give isl_map **LexMaxPtr = nullptr); + +/// @brief Return the number of iterations for the outermost dim of @p Schedule. +__isl_give isl_pw_aff * +getNumberOfIterationsForSchedule(__isl_take isl_union_map *Schedule); + } // end namespace polly #endif Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -618,7 +618,9 @@ isl_set *MemoryAccess::getStride(__isl_take const isl_map *Schedule) const { isl_map *S = const_cast(Schedule); - isl_map *AccessRelation = getAccessRelation(); + isl_map *AccessRelation = getNewAccessRelation(); + if (!AccessRelation) + AccessRelation = getAccessRelation(); isl_space *Space = isl_space_range(isl_map_get_space(S)); isl_map *NextScatt = getEqualAndLarger(Space); Index: lib/CodeGen/IslAst.cpp =================================================================== --- lib/CodeGen/IslAst.cpp +++ lib/CodeGen/IslAst.cpp @@ -25,6 +25,7 @@ #include "polly/LinkAllPasses.h" #include "polly/Options.h" #include "polly/ScopInfo.h" +#include "polly/Support/GICHelper.h" #include "llvm/Support/Debug.h" #include "isl/union_map.h" @@ -466,6 +467,31 @@ return Payload ? Payload->Build : nullptr; } +isl_pw_aff *IslAstInfo::getNumberOfIterations(isl_ast_node *For) { + isl_union_map *Schedule = IslAstInfo::getSchedule(For); + return Schedule ? getNumberOfIterationsForSchedule(Schedule) : nullptr; +} + +int IslAstInfo::getNumberOfIterationsAsInt(isl_ast_node *For) { + isl_pw_aff *NumItPAff = getNumberOfIterations(For); + if (!NumItPAff || !isl_pw_aff_is_cst(NumItPAff) || + isl_pw_aff_n_piece(NumItPAff) != 1) { + isl_pw_aff_free(NumItPAff); + return -1; + } + + isl_aff *NumItAff = isl_aff_from_pw_aff(NumItPAff); + assert(isl_aff_is_cst(NumItAff) && "Assumed affine function to be constant"); + + isl_val *NumItVal = isl_aff_get_constant_val(NumItAff); + isl_aff_free(NumItAff); + + int NumItInt = isl_val_get_num_si(NumItVal); + isl_val_free(NumItVal); + + return NumItInt; +} + void IslAstInfo::printScop(raw_ostream &OS) const { isl_ast_print_options *Options; isl_ast_node *RootNode = getAst(); Index: lib/CodeGen/IslCodeGeneration.cpp =================================================================== --- lib/CodeGen/IslCodeGeneration.cpp +++ lib/CodeGen/IslCodeGeneration.cpp @@ -110,8 +110,6 @@ __isl_give isl_ast_expr *getUpperBound(__isl_keep isl_ast_node *For, CmpInst::Predicate &Predicate); - unsigned getNumberOfIterations(__isl_keep isl_ast_node *For); - void createFor(__isl_take isl_ast_node *For); void createForVector(__isl_take isl_ast_node *For, int VectorWidth); void createForSequential(__isl_take isl_ast_node *For); @@ -222,15 +220,6 @@ return UB; } -unsigned IslNodeBuilder::getNumberOfIterations(__isl_keep isl_ast_node *For) { - isl_union_map *Schedule = IslAstInfo::getSchedule(For); - isl_set *LoopDomain = isl_set_from_union_set(isl_union_map_range(Schedule)); - int NumberOfIterations = polly::getNumberOfIterations(LoopDomain); - if (NumberOfIterations == -1) - return -1; - return NumberOfIterations + 1; -} - void IslNodeBuilder::createUserVector(__isl_take isl_ast_node *User, std::vector &IVS, __isl_take isl_id *IteratorID, @@ -386,7 +375,7 @@ if (Vector && IslAstInfo::isInnermostParallel(For) && !IslAstInfo::isReductionParallel(For)) { - int VectorWidth = getNumberOfIterations(For); + int VectorWidth = IslAstInfo::getNumberOfIterationsAsInt(For); if (1 < VectorWidth && VectorWidth <= 16) { createForVector(For, VectorWidth); return; Index: lib/Exchange/JSONExporter.cpp =================================================================== --- lib/Exchange/JSONExporter.cpp +++ lib/Exchange/JSONExporter.cpp @@ -185,6 +185,7 @@ S = &scop; Region &R = S->getRegion(); Dependences *D = &getAnalysis(); + const DataLayout &DL = getAnalysis().getDataLayout(); std::string FileName = ImportDir + "/" + getFileName(S); @@ -281,18 +282,33 @@ newAccessMap = isl_map_set_tuple_id(newAccessMap, isl_dim_out, OutId); // We keep the old alignment, thus we cannot allow accesses to memory - // locations that were not accessed before. - isl_set *newAccessSet = isl_map_range(isl_map_copy(newAccessMap)); - isl_set *currentAccessSet = isl_map_range(isl_map_copy(currentAccessMap)); - bool isSubset = isl_set_is_subset(newAccessSet, currentAccessSet); - isl_set_free(newAccessSet); - isl_set_free(currentAccessSet); - - if (!isSubset) { - errs() << "JScop file changes the accessed memory\n"; - isl_map_free(currentAccessMap); - isl_map_free(newAccessMap); - return false; + // locations that were not accessed before if the alignment of the access + // is not the default alignment. + bool SpecialAlignment = true; + if (LoadInst *LoadI = dyn_cast(MA->getAccessInstruction())) { + SpecialAlignment = + DL.getABITypeAlignment(LoadI->getType()) != LoadI->getAlignment(); + } else if (StoreInst *StoreI = + dyn_cast(MA->getAccessInstruction())) { + SpecialAlignment = + DL.getABITypeAlignment(StoreI->getValueOperand()->getType()) != + StoreI->getAlignment(); + } + + if (SpecialAlignment) { + isl_set *newAccessSet = isl_map_range(isl_map_copy(newAccessMap)); + isl_set *currentAccessSet = + isl_map_range(isl_map_copy(currentAccessMap)); + bool isSubset = isl_set_is_subset(newAccessSet, currentAccessSet); + isl_set_free(newAccessSet); + isl_set_free(currentAccessSet); + + if (!isSubset) { + errs() << "JScop file changes the accessed memory\n"; + isl_map_free(currentAccessMap); + isl_map_free(newAccessMap); + return false; + } } // We need to copy the isl_ids for the parameter dimensions to the new @@ -342,6 +358,7 @@ void JSONImporter::getAnalysisUsage(AnalysisUsage &AU) const { ScopPass::getAnalysisUsage(AU); AU.addRequired(); + AU.addRequired(); } Pass *polly::createJSONImporterPass() { return new JSONImporter(); } @@ -360,6 +377,7 @@ " (Reads a .jscop file for each Scop)", false, false); INITIALIZE_PASS_DEPENDENCY(Dependences) +INITIALIZE_PASS_DEPENDENCY(DataLayoutPass) INITIALIZE_PASS_END(JSONImporter, "polly-import-jscop", "Polly - Import Scops from JSON" " (Reads a .jscop file for each Scop)", Index: lib/Support/GICHelper.cpp =================================================================== --- lib/Support/GICHelper.cpp +++ lib/Support/GICHelper.cpp @@ -11,7 +11,9 @@ // //===----------------------------------------------------------------------===// #include "polly/Support/GICHelper.h" + #include "llvm/IR/Value.h" + #include "isl/aff.h" #include "isl/map.h" #include "isl/schedule.h" @@ -152,3 +154,70 @@ makeIslCompatible(ValStr); return ValStr; } + +isl_pw_aff *polly::incrementPwAff(isl_pw_aff *Aff, int i) { + isl_aff *Zero = isl_aff_zero_on_domain( + isl_local_space_from_space(isl_pw_aff_get_domain_space(Aff))); + isl_aff *Inc = isl_aff_add_constant_si(Zero, i); + return isl_pw_aff_add(Aff, isl_pw_aff_from_aff(Inc)); +} + +isl_pw_multi_aff *polly::getNumElementsUB(isl_set *S, int Dim, + isl_map **LexMinPtr, + isl_map **LexMaxPtr) { + S = isl_set_reset_tuple_id(S); + + isl_map *Map = isl_map_from_domain_and_range(isl_set_copy(S), S); + for (int i = 0; i < Dim; i++) + Map = isl_map_equate(Map, isl_dim_in, i, isl_dim_out, i); + + isl_map *LexMax = isl_map_lexmax(isl_map_copy(Map)); + if (LexMaxPtr) + *LexMaxPtr = isl_map_copy(LexMax); + + isl_map *LexMin = isl_map_lexmin(Map); + if (LexMinPtr) + *LexMinPtr = isl_map_copy(LexMin); + + isl_map *Sub = isl_map_sum(LexMax, isl_map_neg(LexMin)); + isl_pw_multi_aff *Elements = isl_pw_multi_aff_from_map(Sub); + + // Elements is now off-by-one (to low) in each dimension. + for (unsigned u = 0, e = isl_pw_multi_aff_dim(Elements, isl_dim_out); u < e; + u++) { + isl_pw_aff *ElementsDim = isl_pw_multi_aff_get_pw_aff(Elements, u); + ElementsDim = incrementPwAff(ElementsDim, 1); + Elements = isl_pw_multi_aff_set_pw_aff(Elements, u, ElementsDim); + } + + return isl_pw_multi_aff_coalesce(Elements); +} + +static int extractAffineFromPiecewiseAffine(__isl_keep isl_set *Dom, + __isl_keep isl_aff *Aff, + void *User) { + isl_set_free(Dom); + *(isl_aff **)User = Aff; + return 0; +} + +isl_aff *polly::isl_aff_from_pw_aff(isl_pw_aff *PA) { + assert(PA && isl_pw_aff_n_piece(PA) == 1 && + "PA is null or has not exactly one piece"); + isl_aff *A = nullptr; + isl_pw_aff_foreach_piece(PA, extractAffineFromPiecewiseAffine, &A); + isl_pw_aff_free(PA); + assert(A && "Could not extract affine piece"); + return A; +} + +isl_pw_aff *polly::getNumberOfIterationsForSchedule(isl_union_map *Schedule) { + isl_union_set *Range = isl_union_map_range(Schedule); + isl_set *LoopDomain = isl_set_from_union_set(Range); + int InDim = isl_set_n_dim(LoopDomain) - 1; + isl_pw_multi_aff *NumElements = getNumElementsUB(LoopDomain, InDim); + unsigned Dim = isl_pw_multi_aff_dim(NumElements, isl_dim_set) - 1; + isl_pw_aff *NumIterations = isl_pw_multi_aff_get_pw_aff(NumElements, Dim); + isl_pw_multi_aff_free(NumElements); + return NumIterations; +} Index: test/Isl/CodeGen/MemAccess/default_aligned_new_access_function.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/MemAccess/default_aligned_new_access_function.ll @@ -0,0 +1,41 @@ +; RUN: opt %loadPolly -basicaa -polly-import-jscop -polly-import-jscop-dir=%S -analyze < %s | FileCheck %s +; +; Check that we allow the new access functions even though they access +; different locations than the original ones (but the alignment is the +; default, thus there is no problem). +; +; CHECK-DAG: New access function '{ Stmt_for_body[i0] -> MemRef_B[0] }'detected in JSCOP file +; CHECK-DAG: New access function '{ Stmt_for_body[i0] -> MemRef_A[i0] }'detected in JSCOP file +; +; void simple_stride(int *restrict A, int *restrict B) { +; for (int i = 0; i < 16; i++) +; A[i * 2] = B[i * 2]; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @simple_stride(i32* noalias %A, i32* noalias %B) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvars.iv, 16 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tmp = shl nsw i64 %indvars.iv, 1 + %arrayidx = getelementptr inbounds i32* %B, i64 %tmp + %tmp4 = load i32* %arrayidx, align 4 + %tmp5 = shl nsw i64 %indvars.iv, 1 + %arrayidx3 = getelementptr inbounds i32* %A, i64 %tmp5 + store i32 %tmp4, i32* %arrayidx3, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/Isl/CodeGen/MemAccess/simple_stride___%for.cond---%for.end.jscop =================================================================== --- /dev/null +++ test/Isl/CodeGen/MemAccess/simple_stride___%for.cond---%for.end.jscop @@ -0,0 +1,21 @@ +{ + "context" : "{ : }", + "name" : "for.cond => for.end", + "statements" : [ + { + "accesses" : [ + { + "kind" : "read", + "relation" : "{ Stmt_for_body[i0] -> MemRef_B[0] }" + }, + { + "kind" : "write", + "relation" : "{ Stmt_for_body[i0] -> MemRef_A[i0] }" + } + ], + "domain" : "{ Stmt_for_body[i0] : i0 >= 0 and i0 <= 15 }", + "name" : "Stmt_for_body", + "schedule" : "{ Stmt_for_body[i0] -> scattering[0, i0, 0] }" + } + ] +} Index: test/Isl/CodeGen/MemAccess/simple_stride_test.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/MemAccess/simple_stride_test.ll @@ -0,0 +1,47 @@ +; RUN: opt %loadPolly -basicaa -polly-import-jscop -polly-import-jscop-dir=%S -polly-codegen-isl -polly-vectorizer=polly -S < %s | FileCheck %s +; +; Check that we use the correct __new__ strides: +; stride zero for B +; stride one for A +; +; CHECK: %polly.access.B = getelementptr i32* %B, i64 0 +; CHECK: %[[BC:[._a-zA-Z0-9]*]] = bitcast i32* %polly.access.B to <1 x i32>* +; CHECK: %[[LD:[._a-zA-Z0-9]*]] = load <1 x i32>* %[[BC]], align 8 +; CHECK: %[[SV:[._a-zA-Z0-9]*]] = shufflevector <1 x i32> %[[LD]], <1 x i32> %[[LD]], <16 x i32> zeroinitializer +; +; CHECK: %polly.access.A = getelementptr i32* %A, i64 0 +; CHECK: %[[VP:[._a-zA-Z0-9]*]] = bitcast i32* %polly.access.A to <16 x i32>* +; CHECK: store <16 x i32> %[[SV]], <16 x i32>* %[[VP]], align 8 +; +; void simple_stride(int *restrict A, int *restrict B) { +; for (int i = 0; i < 16; i++) +; A[i * 2] = B[i * 2]; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @simple_stride(i32* noalias %A, i32* noalias %B) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvars.iv, 16 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tmp = shl nsw i64 %indvars.iv, 1 + %arrayidx = getelementptr inbounds i32* %B, i64 %tmp + %tmp4 = load i32* %arrayidx, align 4 + %tmp5 = shl nsw i64 %indvars.iv, 1 + %arrayidx3 = getelementptr inbounds i32* %A, i64 %tmp5 + store i32 %tmp4, i32* %arrayidx3, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +}