Index: include/polly/CodeGen/BlockGenerators.h =================================================================== --- include/polly/CodeGen/BlockGenerators.h +++ include/polly/CodeGen/BlockGenerators.h @@ -211,15 +211,17 @@ /// @param SE The scalar evolution info for the current function /// @param Build The AST build with the new schedule. /// @param ExprBuilder An expression builder to generate new access functions. + /// @param IteratorID The ID of the induction variable of the parallel loop. static void generate(PollyIRBuilder &B, ScopStmt &Stmt, VectorValueMapT &GlobalMaps, std::vector &VLTS, __isl_keep isl_map *Schedule, Pass *P, LoopInfo &LI, ScalarEvolution &SE, __isl_keep isl_ast_build *Build = nullptr, - IslExprBuilder *ExprBuilder = nullptr) { + IslExprBuilder *ExprBuilder = nullptr, + __isl_take isl_id *IteratorID = nullptr) { VectorBlockGenerator Generator(B, GlobalMaps, VLTS, Stmt, Schedule, P, LI, - SE, Build, ExprBuilder); + SE, Build, ExprBuilder, IteratorID); Generator.copyBB(); } @@ -253,12 +255,18 @@ // dimension of the innermost loop containing the statemenet. isl_map *Schedule; + /// @brief The iterator id of the vectorized loop. + isl_id *IteratorID; + VectorBlockGenerator(PollyIRBuilder &B, VectorValueMapT &GlobalMaps, std::vector &VLTS, ScopStmt &Stmt, __isl_keep isl_map *Schedule, Pass *P, LoopInfo &LI, ScalarEvolution &SE, __isl_keep isl_ast_build *Build = nullptr, - IslExprBuilder *ExprBuilder = nullptr); + IslExprBuilder *ExprBuilder = nullptr, + __isl_take isl_id *IteratorID = nullptr); + + ~VectorBlockGenerator(); int getVectorWidth(); Index: include/polly/CodeGen/IslExprBuilder.h =================================================================== --- include/polly/CodeGen/IslExprBuilder.h +++ include/polly/CodeGen/IslExprBuilder.h @@ -123,10 +123,21 @@ /// @return The type with which the expression should be computed. llvm::IntegerType *getType(__isl_keep isl_ast_expr *Expr); + /// @brief Add @p Offset to the value of @p Id when it's used. + /// + /// This can be used to adjust the vector laneif we scalarize vector accesses. + void addIdOffset(__isl_keep isl_id *Id, int Offset); + + /// @brief Reset the offset of an id. + void resetIdOffset(__isl_keep isl_id *Id); + private: PollyIRBuilder &Builder; std::map &IDToValue; + /// @brief Mapping from id's to offset (e.g., their vector lane). + llvm::DenseMap IDToOffset; + /// @brief A SCEVExpander to translate dimension sizes to llvm values. llvm::SCEVExpander &Expander; Index: lib/CodeGen/BlockGenerators.cpp =================================================================== --- lib/CodeGen/BlockGenerators.cpp +++ lib/CodeGen/BlockGenerators.cpp @@ -289,13 +289,17 @@ PollyIRBuilder &B, VectorValueMapT &GlobalMaps, std::vector &VLTS, ScopStmt &Stmt, __isl_keep isl_map *Schedule, Pass *P, LoopInfo &LI, ScalarEvolution &SE, - __isl_keep isl_ast_build *Build, IslExprBuilder *ExprBuilder) + __isl_keep isl_ast_build *Build, IslExprBuilder *ExprBuilder, + isl_id *IteratorID) : BlockGenerator(B, Stmt, P, LI, SE, Build, ExprBuilder), - GlobalMaps(GlobalMaps), VLTS(VLTS), Schedule(Schedule) { + GlobalMaps(GlobalMaps), VLTS(VLTS), Schedule(Schedule), + IteratorID(IteratorID) { assert(GlobalMaps.size() > 1 && "Only one vector lane found"); assert(Schedule && "No statement domain provided"); } +VectorBlockGenerator::~VectorBlockGenerator() { isl_id_free(IteratorID); } + Value *VectorBlockGenerator::getVectorValue(const Value *Old, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps, @@ -393,6 +397,8 @@ Value *Vector = UndefValue::get(VectorType); for (int i = 0; i < VectorWidth; i++) { + if (ExprBuilder) + ExprBuilder->addIdOffset(IteratorID, i); Value *NewPointer = generateLocationAccessed(Load, Pointer, ScalarMaps[i], GlobalMaps[i], VLTS[i]); Value *ScalarLoad = @@ -400,6 +406,8 @@ Vector = Builder.CreateInsertElement( Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_"); } + if (ExprBuilder) + ExprBuilder->resetIdOffset(IteratorID); return Vector; } @@ -409,9 +417,14 @@ VectorValueMapT &ScalarMaps) { if (PollyVectorizerChoice >= VECTORIZER_FIRST_NEED_GROUPED_UNROLL || !VectorType::isValidElementType(Load->getType())) { - for (int i = 0; i < getVectorWidth(); i++) + for (int i = 0; i < getVectorWidth(); i++) { + if (ExprBuilder) + ExprBuilder->addIdOffset(IteratorID, i); ScalarMaps[i][Load] = generateScalarLoad(Load, ScalarMaps[i], GlobalMaps[i], VLTS[i]); + } + if (ExprBuilder) + ExprBuilder->resetIdOffset(IteratorID); return; } @@ -490,11 +503,15 @@ Store->setAlignment(8); } else { for (unsigned i = 0; i < ScalarMaps.size(); i++) { + if (ExprBuilder) + ExprBuilder->addIdOffset(IteratorID, i); Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i)); Value *NewPointer = generateLocationAccessed( Store, Pointer, ScalarMaps[i], GlobalMaps[i], VLTS[i]); Builder.CreateStore(Scalar, NewPointer); } + if (ExprBuilder) + ExprBuilder->resetIdOffset(IteratorID); } } @@ -546,9 +563,14 @@ HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps); - for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++) + for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++) { + if (ExprBuilder) + ExprBuilder->addIdOffset(IteratorID, VectorLane); BlockGenerator::copyInstruction(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane], VLTS[VectorLane]); + } + if (ExprBuilder) + ExprBuilder->resetIdOffset(IteratorID); if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand) return; Index: lib/CodeGen/IslCodeGeneration.cpp =================================================================== --- lib/CodeGen/IslCodeGeneration.cpp +++ lib/CodeGen/IslCodeGeneration.cpp @@ -236,9 +236,11 @@ Schedule = isl_union_map_intersect_domain(Schedule, Domain); isl_map *S = isl_map_from_union_map(Schedule); - createSubstitutionsVector(Expr, Stmt, VectorMap, VLTS, IVS, IteratorID); + createSubstitutionsVector(Expr, Stmt, VectorMap, VLTS, IVS, + isl_id_copy(IteratorID)); VectorBlockGenerator::generate(Builder, *Stmt, VectorMap, VLTS, S, P, LI, SE, - IslAstInfo::getBuild(User), &ExprBuilder); + IslAstInfo::getBuild(User), &ExprBuilder, + IteratorID); isl_map_free(S); isl_id_free(Id); Index: lib/CodeGen/IslExprBuilder.cpp =================================================================== --- lib/CodeGen/IslExprBuilder.cpp +++ lib/CodeGen/IslExprBuilder.cpp @@ -444,6 +444,13 @@ V = IDToValue[Id]; + const auto &IDToOffsetIt = IDToOffset.find(Id); + if (IDToOffsetIt != IDToOffset.end()) { + assert(V->getType()->isIntegerTy() && "ID offset works only for integers"); + Value *Offset = ConstantInt::get(V->getType(), IDToOffsetIt->second); + V = Builder.CreateAdd(V, Offset); + } + isl_id_free(Id); isl_ast_expr_free(Expr); @@ -490,3 +497,9 @@ llvm_unreachable("Unexpected enum value"); } + +void IslExprBuilder::addIdOffset(isl_id *Id, int Offset) { + IDToOffset[Id] = Offset; +} + +void IslExprBuilder::resetIdOffset(isl_id *Id) { IDToOffset.erase(Id); } Index: test/Isl/CodeGen/MemAccess/simple_stride2___%for.cond---%for.end.jscop =================================================================== --- /dev/null +++ test/Isl/CodeGen/MemAccess/simple_stride2___%for.cond---%for.end.jscop @@ -0,0 +1,17 @@ +{ + "context" : "{ : }", + "name" : "for.cond => for.end", + "statements" : [ + { + "accesses" : [ + { + "kind" : "write", + "relation" : "{ Stmt_for_body[i0] -> MemRef_A[i0] }" + } + ], + "domain" : "{ Stmt_for_body[i0] : i0 >= 0 and i0 <= 15 }", + "name" : "Stmt_for_body", + "schedule" : "{ Stmt_for_body[i0] -> scattering[0, i0, 0] }" + } + ] +} Index: test/Isl/CodeGen/MemAccess/simple_stride___%for.cond---%for.end.jscop.unknown_stride =================================================================== --- /dev/null +++ test/Isl/CodeGen/MemAccess/simple_stride___%for.cond---%for.end.jscop.unknown_stride @@ -0,0 +1,21 @@ +{ + "context" : "{ : }", + "name" : "for.cond => for.end", + "statements" : [ + { + "accesses" : [ + { + "kind" : "read", + "relation" : "{ Stmt_for_body[i0] -> MemRef_B[3*i0] }" + }, + { + "kind" : "write", + "relation" : "{ Stmt_for_body[i0] -> MemRef_A[5*i0] }" + } + ], + "domain" : "{ Stmt_for_body[i0] : i0 >= 0 and i0 <= 15 }", + "name" : "Stmt_for_body", + "schedule" : "{ Stmt_for_body[i0] -> scattering[0, i0, 0] }" + } + ] +} Index: test/Isl/CodeGen/MemAccess/simple_stride_test.ll =================================================================== --- test/Isl/CodeGen/MemAccess/simple_stride_test.ll +++ test/Isl/CodeGen/MemAccess/simple_stride_test.ll @@ -1,6 +1,8 @@ ; RUN: opt %loadPolly -basicaa -polly-import-jscop -polly-import-jscop-dir=%S -polly-codegen-isl -polly-vectorizer=polly -S < %s | FileCheck %s +; RUN: opt %loadPolly -basicaa -polly-import-jscop -polly-import-jscop-dir=%S -polly-codegen-isl -polly-vectorizer=unroll-only -S < %s | FileCheck %s --check-prefix=UNROLL +; RUN: opt %loadPolly -basicaa -polly-import-jscop -polly-import-jscop-dir=%S -polly-import-jscop-postfix=unknown_stride -polly-codegen-isl -polly-vectorizer=polly -S < %s | FileCheck %s --check-prefix=UNKNOWN_STRIDE ; -; Check that we use the correct __new__ strides: +; The first test (CHECK) verifies that we use the correct __new__ strides: ; stride zero for B ; stride one for A ; @@ -13,6 +15,109 @@ ; CHECK: %[[VP:[._a-zA-Z0-9]*]] = bitcast i32* %polly.access.A to <16 x i32>* ; CHECK: store <16 x i32> %[[SV]], <16 x i32>* %[[VP]], align 8 ; +; When unrolling the code path in the backend is different. Test it separately. +; +; UNROLL: %polly.access.A[[O0:[0-9]*]] = getelementptr i32* %A, i64 0 +; UNROLL: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O0]] +; UNROLL: %polly.access.A[[O1:[0-9]*]] = getelementptr i32* %A, i64 1 +; UNROLL: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O1]] +; UNROLL: %polly.access.A[[O2:[0-9]*]] = getelementptr i32* %A, i64 2 +; UNROLL: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O2]] +; UNROLL: %polly.access.A[[O3:[0-9]*]] = getelementptr i32* %A, i64 3 +; UNROLL: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O3]] +; UNROLL: %polly.access.A[[O4:[0-9]*]] = getelementptr i32* %A, i64 4 +; UNROLL: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O4]] +; UNROLL: %polly.access.A[[O5:[0-9]*]] = getelementptr i32* %A, i64 5 +; UNROLL: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O5]] +; UNROLL: %polly.access.A[[O6:[0-9]*]] = getelementptr i32* %A, i64 6 +; UNROLL: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O6]] +; UNROLL: %polly.access.A[[O7:[0-9]*]] = getelementptr i32* %A, i64 7 +; UNROLL: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O7]] +; UNROLL: %polly.access.A[[O8:[0-9]*]] = getelementptr i32* %A, i64 8 +; UNROLL: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O8]] +; UNROLL: %polly.access.A[[O9:[0-9]*]] = getelementptr i32* %A, i64 9 +; UNROLL: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O9]] +; UNROLL: %polly.access.A[[O10:[0-9]*]] = getelementptr i32* %A, i64 10 +; UNROLL: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O10]] +; UNROLL: %polly.access.A[[O11:[0-9]*]] = getelementptr i32* %A, i64 11 +; UNROLL: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O11]] +; UNROLL: %polly.access.A[[O12:[0-9]*]] = getelementptr i32* %A, i64 12 +; UNROLL: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O12]] +; UNROLL: %polly.access.A[[O13:[0-9]*]] = getelementptr i32* %A, i64 13 +; UNROLL: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O13]] +; UNROLL: %polly.access.A[[O14:[0-9]*]] = getelementptr i32* %A, i64 14 +; UNROLL: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O14]] +; UNROLL: %polly.access.A[[O15:[0-9]*]] = getelementptr i32* %A, i64 15 +; UNROLL: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O15]] +; +; When the stride is not zero or one both the load and the store will use other code paths: +; +; UNKNOWN_STRIDE: %polly.access.B[[L1:[0-9]*]] = getelementptr i32* %B, i64 0 +; UNKNOWN_STRIDE: load i32* %polly.access.B[[L1]] +; UNKNOWN_STRIDE: %polly.access.B[[L2:[0-9]*]] = getelementptr i32* %B, i64 3 +; UNKNOWN_STRIDE: load i32* %polly.access.B[[L2]] +; UNKNOWN_STRIDE: %polly.access.B[[L3:[0-9]*]] = getelementptr i32* %B, i64 6 +; UNKNOWN_STRIDE: load i32* %polly.access.B[[L3]] +; UNKNOWN_STRIDE: %polly.access.B[[L4:[0-9]*]] = getelementptr i32* %B, i64 9 +; UNKNOWN_STRIDE: load i32* %polly.access.B[[L4]] +; UNKNOWN_STRIDE: %polly.access.B[[L5:[0-9]*]] = getelementptr i32* %B, i64 12 +; UNKNOWN_STRIDE: load i32* %polly.access.B[[L5]] +; UNKNOWN_STRIDE: %polly.access.B[[L6:[0-9]*]] = getelementptr i32* %B, i64 15 +; UNKNOWN_STRIDE: load i32* %polly.access.B[[L6]] +; UNKNOWN_STRIDE: %polly.access.B[[L7:[0-9]*]] = getelementptr i32* %B, i64 18 +; UNKNOWN_STRIDE: load i32* %polly.access.B[[L7]] +; UNKNOWN_STRIDE: %polly.access.B[[L8:[0-9]*]] = getelementptr i32* %B, i64 21 +; UNKNOWN_STRIDE: load i32* %polly.access.B[[L8]] +; UNKNOWN_STRIDE: %polly.access.B[[L9:[0-9]*]] = getelementptr i32* %B, i64 24 +; UNKNOWN_STRIDE: load i32* %polly.access.B[[L9]] +; UNKNOWN_STRIDE: %polly.access.B[[L10:[0-9]*]] = getelementptr i32* %B, i64 27 +; UNKNOWN_STRIDE: load i32* %polly.access.B[[L10]] +; UNKNOWN_STRIDE: %polly.access.B[[L11:[0-9]*]] = getelementptr i32* %B, i64 30 +; UNKNOWN_STRIDE: load i32* %polly.access.B[[L11]] +; UNKNOWN_STRIDE: %polly.access.B[[L12:[0-9]*]] = getelementptr i32* %B, i64 33 +; UNKNOWN_STRIDE: load i32* %polly.access.B[[L12]] +; UNKNOWN_STRIDE: %polly.access.B[[L13:[0-9]*]] = getelementptr i32* %B, i64 36 +; UNKNOWN_STRIDE: load i32* %polly.access.B[[L13]] +; UNKNOWN_STRIDE: %polly.access.B[[L14:[0-9]*]] = getelementptr i32* %B, i64 39 +; UNKNOWN_STRIDE: load i32* %polly.access.B[[L14]] +; UNKNOWN_STRIDE: %polly.access.B[[L15:[0-9]*]] = getelementptr i32* %B, i64 42 +; UNKNOWN_STRIDE: load i32* %polly.access.B[[L15]] +; UNKNOWN_STRIDE: %polly.access.B[[L16:[0-9]*]] = getelementptr i32* %B, i64 45 +; UNKNOWN_STRIDE: load i32* %polly.access.B[[L16]] +; +; UNKNOWN_STRIDE: %polly.access.A[[O0:[0-9]*]] = getelementptr i32* %A, i64 0 +; UNKNOWN_STRIDE: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O0]] +; UNKNOWN_STRIDE: %polly.access.A[[O1:[0-9]*]] = getelementptr i32* %A, i64 5 +; UNKNOWN_STRIDE: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O1]] +; UNKNOWN_STRIDE: %polly.access.A[[O2:[0-9]*]] = getelementptr i32* %A, i64 10 +; UNKNOWN_STRIDE: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O2]] +; UNKNOWN_STRIDE: %polly.access.A[[O3:[0-9]*]] = getelementptr i32* %A, i64 15 +; UNKNOWN_STRIDE: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O3]] +; UNKNOWN_STRIDE: %polly.access.A[[O4:[0-9]*]] = getelementptr i32* %A, i64 20 +; UNKNOWN_STRIDE: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O4]] +; UNKNOWN_STRIDE: %polly.access.A[[O5:[0-9]*]] = getelementptr i32* %A, i64 25 +; UNKNOWN_STRIDE: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O5]] +; UNKNOWN_STRIDE: %polly.access.A[[O6:[0-9]*]] = getelementptr i32* %A, i64 30 +; UNKNOWN_STRIDE: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O6]] +; UNKNOWN_STRIDE: %polly.access.A[[O7:[0-9]*]] = getelementptr i32* %A, i64 35 +; UNKNOWN_STRIDE: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O7]] +; UNKNOWN_STRIDE: %polly.access.A[[O8:[0-9]*]] = getelementptr i32* %A, i64 40 +; UNKNOWN_STRIDE: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O8]] +; UNKNOWN_STRIDE: %polly.access.A[[O9:[0-9]*]] = getelementptr i32* %A, i64 45 +; UNKNOWN_STRIDE: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O9]] +; UNKNOWN_STRIDE: %polly.access.A[[O10:[0-9]*]] = getelementptr i32* %A, i64 50 +; UNKNOWN_STRIDE: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O10]] +; UNKNOWN_STRIDE: %polly.access.A[[O11:[0-9]*]] = getelementptr i32* %A, i64 55 +; UNKNOWN_STRIDE: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O11]] +; UNKNOWN_STRIDE: %polly.access.A[[O12:[0-9]*]] = getelementptr i32* %A, i64 60 +; UNKNOWN_STRIDE: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O12]] +; UNKNOWN_STRIDE: %polly.access.A[[O13:[0-9]*]] = getelementptr i32* %A, i64 65 +; UNKNOWN_STRIDE: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O13]] +; UNKNOWN_STRIDE: %polly.access.A[[O14:[0-9]*]] = getelementptr i32* %A, i64 70 +; UNKNOWN_STRIDE: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O14]] +; UNKNOWN_STRIDE: %polly.access.A[[O15:[0-9]*]] = getelementptr i32* %A, i64 75 +; UNKNOWN_STRIDE: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O15]] +; ; void simple_stride(int *restrict A, int *restrict B) { ; for (int i = 0; i < 16; i++) ; A[i * 2] = B[i * 2]; Index: test/Isl/CodeGen/MemAccess/simple_stride_test_2.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/MemAccess/simple_stride_test_2.ll @@ -0,0 +1,65 @@ +; RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-dir=%S -polly-codegen-isl -polly-vectorizer=polly -S < %s | FileCheck %s +; +; CHECK: %polly.access.A[[O0:[0-9]*]] = getelementptr i32* %A, i64 0 +; CHECK: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O0]], align 4 +; CHECK: %polly.access.A[[O1:[0-9]*]] = getelementptr i32* %A, i64 1 +; CHECK: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O1]], align 4 +; CHECK: %polly.access.A[[O2:[0-9]*]] = getelementptr i32* %A, i64 2 +; CHECK: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O2]], align 4 +; CHECK: %polly.access.A[[O3:[0-9]*]] = getelementptr i32* %A, i64 3 +; CHECK: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O3]], align 4 +; CHECK: %polly.access.A[[O4:[0-9]*]] = getelementptr i32* %A, i64 4 +; CHECK: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O4]], align 4 +; CHECK: %polly.access.A[[O5:[0-9]*]] = getelementptr i32* %A, i64 5 +; CHECK: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O5]], align 4 +; CHECK: %polly.access.A[[O6:[0-9]*]] = getelementptr i32* %A, i64 6 +; CHECK: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O6]], align 4 +; CHECK: %polly.access.A[[O7:[0-9]*]] = getelementptr i32* %A, i64 7 +; CHECK: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O7]], align 4 +; CHECK: %polly.access.A[[O8:[0-9]*]] = getelementptr i32* %A, i64 8 +; CHECK: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O8]], align 4 +; CHECK: %polly.access.A[[O9:[0-9]*]] = getelementptr i32* %A, i64 9 +; CHECK: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O9]], align 4 +; CHECK: %polly.access.A[[O10:[0-9]*]] = getelementptr i32* %A, i64 10 +; CHECK: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O10]], align 4 +; CHECK: %polly.access.A[[O11:[0-9]*]] = getelementptr i32* %A, i64 11 +; CHECK: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O11]], align 4 +; CHECK: %polly.access.A[[O12:[0-9]*]] = getelementptr i32* %A, i64 12 +; CHECK: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O12]], align 4 +; CHECK: %polly.access.A[[O13:[0-9]*]] = getelementptr i32* %A, i64 13 +; CHECK: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O13]], align 4 +; CHECK: %polly.access.A[[O14:[0-9]*]] = getelementptr i32* %A, i64 14 +; CHECK: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O14]], align 4 +; CHECK: %polly.access.A[[O15:[0-9]*]] = getelementptr i32* %A, i64 15 +; CHECK: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O15]], align 4 +; +; void simple_stride2(int *restrict A) { +; for (int i = 0; i < 16; i++) +; A[i * 2] = i; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @simple_stride2(i32* noalias %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvars.iv, 16 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tmp4 = trunc i64 %indvars.iv to i32 + %tmp5 = shl nsw i64 %indvars.iv, 1 + %arrayidx3 = getelementptr inbounds i32* %A, i64 %tmp5 + store i32 %tmp4, i32* %arrayidx3, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +}