Index: include/polly/CodeGen/IslExprBuilder.h =================================================================== --- include/polly/CodeGen/IslExprBuilder.h +++ include/polly/CodeGen/IslExprBuilder.h @@ -97,7 +97,11 @@ /// dimensional accesses. IslExprBuilder(PollyIRBuilder &Builder, IDToValueTy &IDToValue, llvm::SCEVExpander &Expander) - : Builder(Builder), IDToValue(IDToValue), Expander(Expander) {} + : Builder(Builder), IDToValue(IDToValue), Expander(Expander), + VectorLoopId(nullptr), VectorLane(0) {} + + /// @brief Destructor to clear the vector loop id. + ~IslExprBuilder() { setVectorLoopId(nullptr); } /// @brief Create LLVM-IR for an isl_ast_expr[ession]. /// @@ -123,6 +127,12 @@ /// @return The type with which the expression should be computed. llvm::IntegerType *getType(__isl_keep isl_ast_expr *Expr); + /// @brief Set the id of the currently vectorized loop to @p Id. + void setVectorLoopId(__isl_take isl_id *Id); + + /// @brief Set the current vector lane. + void setVectorLane(int Lane) { VectorLane = Lane; } + private: PollyIRBuilder &Builder; std::map &IDToValue; @@ -130,6 +140,12 @@ /// @brief A SCEVExpander to translate dimension sizes to llvm values. llvm::SCEVExpander &Expander; + /// @brief The id of the currently vectorized loop or nullptr. + isl_id *VectorLoopId; + + /// @brief The vector lane we currently generate code for. + int VectorLane; + llvm::Value *createOp(__isl_take isl_ast_expr *Expr); llvm::Value *createOpUnary(__isl_take isl_ast_expr *Expr); llvm::Value *createOpAccess(__isl_take isl_ast_expr *Expr); Index: lib/CodeGen/BlockGenerators.cpp =================================================================== --- lib/CodeGen/BlockGenerators.cpp +++ lib/CodeGen/BlockGenerators.cpp @@ -393,6 +393,8 @@ Value *Vector = UndefValue::get(VectorType); for (int i = 0; i < VectorWidth; i++) { + if (ExprBuilder) + ExprBuilder->setVectorLane(i); Value *NewPointer = generateLocationAccessed(Load, Pointer, ScalarMaps[i], GlobalMaps[i], VLTS[i]); Value *ScalarLoad = @@ -400,6 +402,8 @@ Vector = Builder.CreateInsertElement( Vector, ScalarLoad, Builder.getInt32(i), Load->getName() + "_p_vec_"); } + if (ExprBuilder) + ExprBuilder->setVectorLane(0); return Vector; } @@ -409,9 +413,14 @@ VectorValueMapT &ScalarMaps) { if (PollyVectorizerChoice >= VECTORIZER_FIRST_NEED_GROUPED_UNROLL || !VectorType::isValidElementType(Load->getType())) { - for (int i = 0; i < getVectorWidth(); i++) + for (int i = 0; i < getVectorWidth(); i++) { + if (ExprBuilder) + ExprBuilder->setVectorLane(i); ScalarMaps[i][Load] = generateScalarLoad(Load, ScalarMaps[i], GlobalMaps[i], VLTS[i]); + } + if (ExprBuilder) + ExprBuilder->setVectorLane(0); return; } @@ -490,11 +499,15 @@ Store->setAlignment(8); } else { for (unsigned i = 0; i < ScalarMaps.size(); i++) { + if (ExprBuilder) + ExprBuilder->setVectorLane(i); Value *Scalar = Builder.CreateExtractElement(Vector, Builder.getInt32(i)); Value *NewPointer = generateLocationAccessed( Store, Pointer, ScalarMaps[i], GlobalMaps[i], VLTS[i]); Builder.CreateStore(Scalar, NewPointer); } + if (ExprBuilder) + ExprBuilder->setVectorLane(0); } } @@ -546,9 +559,14 @@ HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps); - for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++) + for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++) { + if (ExprBuilder) + ExprBuilder->setVectorLane(VectorLane); BlockGenerator::copyInstruction(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane], VLTS[VectorLane]); + } + if (ExprBuilder) + ExprBuilder->setVectorLane(0); if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand) return; Index: lib/CodeGen/IslCodeGeneration.cpp =================================================================== --- lib/CodeGen/IslCodeGeneration.cpp +++ lib/CodeGen/IslCodeGeneration.cpp @@ -236,9 +236,11 @@ Schedule = isl_union_map_intersect_domain(Schedule, Domain); isl_map *S = isl_map_from_union_map(Schedule); + ExprBuilder.setVectorLoopId(isl_id_copy(IteratorID)); createSubstitutionsVector(Expr, Stmt, VectorMap, VLTS, IVS, IteratorID); VectorBlockGenerator::generate(Builder, *Stmt, VectorMap, VLTS, S, P, LI, SE, IslAstInfo::getBuild(User), &ExprBuilder); + ExprBuilder.setVectorLoopId(nullptr); isl_map_free(S); isl_id_free(Id); Index: lib/CodeGen/IslExprBuilder.cpp =================================================================== --- lib/CodeGen/IslExprBuilder.cpp +++ lib/CodeGen/IslExprBuilder.cpp @@ -444,6 +444,12 @@ V = IDToValue[Id]; + if (VectorLane && Id == VectorLoopId) { + assert(V->getType()->isIntegerTy() && "ID offset works only for integers"); + Value *Offset = ConstantInt::get(V->getType(), VectorLane); + V = Builder.CreateAdd(V, Offset); + } + isl_id_free(Id); isl_ast_expr_free(Expr); @@ -490,3 +496,8 @@ llvm_unreachable("Unexpected enum value"); } + +void IslExprBuilder::setVectorLoopId(isl_id *Id) { + isl_id_free(VectorLoopId); + VectorLoopId = Id; +} Index: test/Isl/CodeGen/MemAccess/simple_stride2___%for.cond---%for.end.jscop =================================================================== --- /dev/null +++ test/Isl/CodeGen/MemAccess/simple_stride2___%for.cond---%for.end.jscop @@ -0,0 +1,17 @@ +{ + "context" : "{ : }", + "name" : "for.cond => for.end", + "statements" : [ + { + "accesses" : [ + { + "kind" : "write", + "relation" : "{ Stmt_for_body[i0] -> MemRef_A[i0] }" + } + ], + "domain" : "{ Stmt_for_body[i0] : i0 >= 0 and i0 <= 15 }", + "name" : "Stmt_for_body", + "schedule" : "{ Stmt_for_body[i0] -> scattering[0, i0, 0] }" + } + ] +} Index: test/Isl/CodeGen/MemAccess/simple_stride___%for.cond---%for.end.jscop.unknown_stride =================================================================== --- /dev/null +++ test/Isl/CodeGen/MemAccess/simple_stride___%for.cond---%for.end.jscop.unknown_stride @@ -0,0 +1,21 @@ +{ + "context" : "{ : }", + "name" : "for.cond => for.end", + "statements" : [ + { + "accesses" : [ + { + "kind" : "read", + "relation" : "{ Stmt_for_body[i0] -> MemRef_B[3*i0] }" + }, + { + "kind" : "write", + "relation" : "{ Stmt_for_body[i0] -> MemRef_A[5*i0] }" + } + ], + "domain" : "{ Stmt_for_body[i0] : i0 >= 0 and i0 <= 15 }", + "name" : "Stmt_for_body", + "schedule" : "{ Stmt_for_body[i0] -> scattering[0, i0, 0] }" + } + ] +} Index: test/Isl/CodeGen/MemAccess/simple_stride_test.ll =================================================================== --- test/Isl/CodeGen/MemAccess/simple_stride_test.ll +++ test/Isl/CodeGen/MemAccess/simple_stride_test.ll @@ -1,6 +1,8 @@ ; RUN: opt %loadPolly -basicaa -polly-import-jscop -polly-import-jscop-dir=%S -polly-codegen-isl -polly-vectorizer=polly -S < %s | FileCheck %s +; RUN: opt %loadPolly -basicaa -polly-import-jscop -polly-import-jscop-dir=%S -polly-codegen-isl -polly-vectorizer=unroll-only -S < %s | FileCheck %s --check-prefix=UNROLL +; RUN: opt %loadPolly -basicaa -polly-import-jscop -polly-import-jscop-dir=%S -polly-import-jscop-postfix=unknown_stride -polly-codegen-isl -polly-vectorizer=polly -S < %s | FileCheck %s --check-prefix=UNKNOWN_STRIDE ; -; Check that we use the correct __new__ strides: +; The first test (CHECK) verifies that we use the correct __new__ strides: ; stride zero for B ; stride one for A ; @@ -13,6 +15,109 @@ ; CHECK: %[[VP:[._a-zA-Z0-9]*]] = bitcast i32* %polly.access.A to <16 x i32>* ; CHECK: store <16 x i32> %[[SV]], <16 x i32>* %[[VP]], align 8 ; +; When unrolling the code path in the backend is different. Test it separately. +; +; UNROLL: %polly.access.A[[O0:[0-9]*]] = getelementptr i32* %A, i64 0 +; UNROLL: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O0]] +; UNROLL: %polly.access.A[[O1:[0-9]*]] = getelementptr i32* %A, i64 1 +; UNROLL: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O1]] +; UNROLL: %polly.access.A[[O2:[0-9]*]] = getelementptr i32* %A, i64 2 +; UNROLL: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O2]] +; UNROLL: %polly.access.A[[O3:[0-9]*]] = getelementptr i32* %A, i64 3 +; UNROLL: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O3]] +; UNROLL: %polly.access.A[[O4:[0-9]*]] = getelementptr i32* %A, i64 4 +; UNROLL: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O4]] +; UNROLL: %polly.access.A[[O5:[0-9]*]] = getelementptr i32* %A, i64 5 +; UNROLL: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O5]] +; UNROLL: %polly.access.A[[O6:[0-9]*]] = getelementptr i32* %A, i64 6 +; UNROLL: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O6]] +; UNROLL: %polly.access.A[[O7:[0-9]*]] = getelementptr i32* %A, i64 7 +; UNROLL: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O7]] +; UNROLL: %polly.access.A[[O8:[0-9]*]] = getelementptr i32* %A, i64 8 +; UNROLL: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O8]] +; UNROLL: %polly.access.A[[O9:[0-9]*]] = getelementptr i32* %A, i64 9 +; UNROLL: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O9]] +; UNROLL: %polly.access.A[[O10:[0-9]*]] = getelementptr i32* %A, i64 10 +; UNROLL: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O10]] +; UNROLL: %polly.access.A[[O11:[0-9]*]] = getelementptr i32* %A, i64 11 +; UNROLL: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O11]] +; UNROLL: %polly.access.A[[O12:[0-9]*]] = getelementptr i32* %A, i64 12 +; UNROLL: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O12]] +; UNROLL: %polly.access.A[[O13:[0-9]*]] = getelementptr i32* %A, i64 13 +; UNROLL: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O13]] +; UNROLL: %polly.access.A[[O14:[0-9]*]] = getelementptr i32* %A, i64 14 +; UNROLL: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O14]] +; UNROLL: %polly.access.A[[O15:[0-9]*]] = getelementptr i32* %A, i64 15 +; UNROLL: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O15]] +; +; When the stride is not zero or one both the load and the store will use other code paths: +; +; UNKNOWN_STRIDE: %polly.access.B[[L1:[0-9]*]] = getelementptr i32* %B, i64 0 +; UNKNOWN_STRIDE: load i32* %polly.access.B[[L1]] +; UNKNOWN_STRIDE: %polly.access.B[[L2:[0-9]*]] = getelementptr i32* %B, i64 3 +; UNKNOWN_STRIDE: load i32* %polly.access.B[[L2]] +; UNKNOWN_STRIDE: %polly.access.B[[L3:[0-9]*]] = getelementptr i32* %B, i64 6 +; UNKNOWN_STRIDE: load i32* %polly.access.B[[L3]] +; UNKNOWN_STRIDE: %polly.access.B[[L4:[0-9]*]] = getelementptr i32* %B, i64 9 +; UNKNOWN_STRIDE: load i32* %polly.access.B[[L4]] +; UNKNOWN_STRIDE: %polly.access.B[[L5:[0-9]*]] = getelementptr i32* %B, i64 12 +; UNKNOWN_STRIDE: load i32* %polly.access.B[[L5]] +; UNKNOWN_STRIDE: %polly.access.B[[L6:[0-9]*]] = getelementptr i32* %B, i64 15 +; UNKNOWN_STRIDE: load i32* %polly.access.B[[L6]] +; UNKNOWN_STRIDE: %polly.access.B[[L7:[0-9]*]] = getelementptr i32* %B, i64 18 +; UNKNOWN_STRIDE: load i32* %polly.access.B[[L7]] +; UNKNOWN_STRIDE: %polly.access.B[[L8:[0-9]*]] = getelementptr i32* %B, i64 21 +; UNKNOWN_STRIDE: load i32* %polly.access.B[[L8]] +; UNKNOWN_STRIDE: %polly.access.B[[L9:[0-9]*]] = getelementptr i32* %B, i64 24 +; UNKNOWN_STRIDE: load i32* %polly.access.B[[L9]] +; UNKNOWN_STRIDE: %polly.access.B[[L10:[0-9]*]] = getelementptr i32* %B, i64 27 +; UNKNOWN_STRIDE: load i32* %polly.access.B[[L10]] +; UNKNOWN_STRIDE: %polly.access.B[[L11:[0-9]*]] = getelementptr i32* %B, i64 30 +; UNKNOWN_STRIDE: load i32* %polly.access.B[[L11]] +; UNKNOWN_STRIDE: %polly.access.B[[L12:[0-9]*]] = getelementptr i32* %B, i64 33 +; UNKNOWN_STRIDE: load i32* %polly.access.B[[L12]] +; UNKNOWN_STRIDE: %polly.access.B[[L13:[0-9]*]] = getelementptr i32* %B, i64 36 +; UNKNOWN_STRIDE: load i32* %polly.access.B[[L13]] +; UNKNOWN_STRIDE: %polly.access.B[[L14:[0-9]*]] = getelementptr i32* %B, i64 39 +; UNKNOWN_STRIDE: load i32* %polly.access.B[[L14]] +; UNKNOWN_STRIDE: %polly.access.B[[L15:[0-9]*]] = getelementptr i32* %B, i64 42 +; UNKNOWN_STRIDE: load i32* %polly.access.B[[L15]] +; UNKNOWN_STRIDE: %polly.access.B[[L16:[0-9]*]] = getelementptr i32* %B, i64 45 +; UNKNOWN_STRIDE: load i32* %polly.access.B[[L16]] +; +; UNKNOWN_STRIDE: %polly.access.A[[O0:[0-9]*]] = getelementptr i32* %A, i64 0 +; UNKNOWN_STRIDE: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O0]] +; UNKNOWN_STRIDE: %polly.access.A[[O1:[0-9]*]] = getelementptr i32* %A, i64 5 +; UNKNOWN_STRIDE: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O1]] +; UNKNOWN_STRIDE: %polly.access.A[[O2:[0-9]*]] = getelementptr i32* %A, i64 10 +; UNKNOWN_STRIDE: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O2]] +; UNKNOWN_STRIDE: %polly.access.A[[O3:[0-9]*]] = getelementptr i32* %A, i64 15 +; UNKNOWN_STRIDE: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O3]] +; UNKNOWN_STRIDE: %polly.access.A[[O4:[0-9]*]] = getelementptr i32* %A, i64 20 +; UNKNOWN_STRIDE: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O4]] +; UNKNOWN_STRIDE: %polly.access.A[[O5:[0-9]*]] = getelementptr i32* %A, i64 25 +; UNKNOWN_STRIDE: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O5]] +; UNKNOWN_STRIDE: %polly.access.A[[O6:[0-9]*]] = getelementptr i32* %A, i64 30 +; UNKNOWN_STRIDE: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O6]] +; UNKNOWN_STRIDE: %polly.access.A[[O7:[0-9]*]] = getelementptr i32* %A, i64 35 +; UNKNOWN_STRIDE: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O7]] +; UNKNOWN_STRIDE: %polly.access.A[[O8:[0-9]*]] = getelementptr i32* %A, i64 40 +; UNKNOWN_STRIDE: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O8]] +; UNKNOWN_STRIDE: %polly.access.A[[O9:[0-9]*]] = getelementptr i32* %A, i64 45 +; UNKNOWN_STRIDE: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O9]] +; UNKNOWN_STRIDE: %polly.access.A[[O10:[0-9]*]] = getelementptr i32* %A, i64 50 +; UNKNOWN_STRIDE: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O10]] +; UNKNOWN_STRIDE: %polly.access.A[[O11:[0-9]*]] = getelementptr i32* %A, i64 55 +; UNKNOWN_STRIDE: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O11]] +; UNKNOWN_STRIDE: %polly.access.A[[O12:[0-9]*]] = getelementptr i32* %A, i64 60 +; UNKNOWN_STRIDE: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O12]] +; UNKNOWN_STRIDE: %polly.access.A[[O13:[0-9]*]] = getelementptr i32* %A, i64 65 +; UNKNOWN_STRIDE: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O13]] +; UNKNOWN_STRIDE: %polly.access.A[[O14:[0-9]*]] = getelementptr i32* %A, i64 70 +; UNKNOWN_STRIDE: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O14]] +; UNKNOWN_STRIDE: %polly.access.A[[O15:[0-9]*]] = getelementptr i32* %A, i64 75 +; UNKNOWN_STRIDE: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O15]] +; ; void simple_stride(int *restrict A, int *restrict B) { ; for (int i = 0; i < 16; i++) ; A[i * 2] = B[i * 2]; Index: test/Isl/CodeGen/MemAccess/simple_stride_test_2.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/MemAccess/simple_stride_test_2.ll @@ -0,0 +1,65 @@ +; RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-dir=%S -polly-codegen-isl -polly-vectorizer=polly -S < %s | FileCheck %s +; +; CHECK: %polly.access.A[[O0:[0-9]*]] = getelementptr i32* %A, i64 0 +; CHECK: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O0]], align 4 +; CHECK: %polly.access.A[[O1:[0-9]*]] = getelementptr i32* %A, i64 1 +; CHECK: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O1]], align 4 +; CHECK: %polly.access.A[[O2:[0-9]*]] = getelementptr i32* %A, i64 2 +; CHECK: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O2]], align 4 +; CHECK: %polly.access.A[[O3:[0-9]*]] = getelementptr i32* %A, i64 3 +; CHECK: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O3]], align 4 +; CHECK: %polly.access.A[[O4:[0-9]*]] = getelementptr i32* %A, i64 4 +; CHECK: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O4]], align 4 +; CHECK: %polly.access.A[[O5:[0-9]*]] = getelementptr i32* %A, i64 5 +; CHECK: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O5]], align 4 +; CHECK: %polly.access.A[[O6:[0-9]*]] = getelementptr i32* %A, i64 6 +; CHECK: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O6]], align 4 +; CHECK: %polly.access.A[[O7:[0-9]*]] = getelementptr i32* %A, i64 7 +; CHECK: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O7]], align 4 +; CHECK: %polly.access.A[[O8:[0-9]*]] = getelementptr i32* %A, i64 8 +; CHECK: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O8]], align 4 +; CHECK: %polly.access.A[[O9:[0-9]*]] = getelementptr i32* %A, i64 9 +; CHECK: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O9]], align 4 +; CHECK: %polly.access.A[[O10:[0-9]*]] = getelementptr i32* %A, i64 10 +; CHECK: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O10]], align 4 +; CHECK: %polly.access.A[[O11:[0-9]*]] = getelementptr i32* %A, i64 11 +; CHECK: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O11]], align 4 +; CHECK: %polly.access.A[[O12:[0-9]*]] = getelementptr i32* %A, i64 12 +; CHECK: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O12]], align 4 +; CHECK: %polly.access.A[[O13:[0-9]*]] = getelementptr i32* %A, i64 13 +; CHECK: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O13]], align 4 +; CHECK: %polly.access.A[[O14:[0-9]*]] = getelementptr i32* %A, i64 14 +; CHECK: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O14]], align 4 +; CHECK: %polly.access.A[[O15:[0-9]*]] = getelementptr i32* %A, i64 15 +; CHECK: store i32 %{{[._a-zA-Z0-9]*}}, i32* %polly.access.A[[O15]], align 4 +; +; void simple_stride2(int *restrict A) { +; for (int i = 0; i < 16; i++) +; A[i * 2] = i; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @simple_stride2(i32* noalias %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvars.iv, 16 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tmp4 = trunc i64 %indvars.iv to i32 + %tmp5 = shl nsw i64 %indvars.iv, 1 + %arrayidx3 = getelementptr inbounds i32* %A, i64 %tmp5 + store i32 %tmp4, i32* %arrayidx3, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +}