Index: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1910,14 +1910,23 @@ // factor. The last of those goes into the PHI. PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind", &*LoopVectorBody->getFirstInsertionPt()); - Value *LastInduction = VecInd; + Instruction *LastInduction = VecInd; for (unsigned Part = 0; Part < UF; ++Part) { Entry[Part] = LastInduction; - LastInduction = Builder.CreateAdd(LastInduction, SplatVF, "step.add"); + LastInduction = cast( + Builder.CreateAdd(LastInduction, SplatVF, "step.add")); } + // Move the last step to the end of the latch block. This ensures consistent + // placement of all induction updates. + auto *LoopVectorLatch = LI->getLoopFor(LoopVectorBody)->getLoopLatch(); + auto *Br = cast(LoopVectorLatch->getTerminator()); + auto *ICmp = cast(Br->getCondition()); + LastInduction->moveBefore(ICmp); + LastInduction->setName("vec.ind.next"); + VecInd->addIncoming(SteppedStart, LoopVectorPreHeader); - VecInd->addIncoming(LastInduction, LoopVectorBody); + VecInd->addIncoming(LastInduction, LoopVectorLatch); } void InnerLoopVectorizer::widenIntInduction(PHINode *IV, VectorParts &Entry, Index: llvm/trunk/test/Transforms/LoopVectorize/X86/scatter_crash.ll =================================================================== --- llvm/trunk/test/Transforms/LoopVectorize/X86/scatter_crash.ll +++ llvm/trunk/test/Transforms/LoopVectorize/X86/scatter_crash.ll @@ -19,8 +19,6 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX:%.*]].next, %vector.body ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ ; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <16 x i64> [ -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <16 x i64> [[VEC_IND]], -; CHECK-NEXT: [[STEP_ADD4:%.*]] = add <16 x i64> [[VEC_IND3]], ; CHECK-NEXT: [[TMP10:%.*]] = sub nsw <16 x i64> , [[VEC_IND]] ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 0 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP11]] @@ -137,6 +135,8 @@ ; CHECK-NEXT: [[TMP123:%.*]] = insertelement <16 x i32*> [[TMP119]], i32* [[TMP122]], i32 15 ; CHECK-NEXT: [[VECTORGEP:%.*]] = getelementptr inbounds [10 x i32], <16 x [10 x i32]*> [[TMP58]], <16 x i64> [[TMP59]], i64 0 ; CHECK-NEXT: call void @llvm.masked.scatter.v16i32(<16 x i32> , <16 x i32*> [[VECTORGEP]], i32 16, <16 x i1> ) +; CHECK: [[STEP_ADD:%.*]] = add <16 x i64> [[VEC_IND]], +; CHECK: [[STEP_ADD4:%.*]] = add <16 x i64> [[VEC_IND3]], entry: %0 = load i32, i32* @c, align 4 %cmp34 = icmp sgt i32 %0, 8 Index: llvm/trunk/test/Transforms/LoopVectorize/induction.ll =================================================================== --- llvm/trunk/test/Transforms/LoopVectorize/induction.ll +++ llvm/trunk/test/Transforms/LoopVectorize/induction.ll @@ -437,18 +437,18 @@ ; IND-LABEL: veciv ; IND: vector.body: ; IND: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] -; IND: %vec.ind = phi <2 x i32> [ , %vector.ph ], [ %step.add, %vector.body ] -; IND: %step.add = add <2 x i32> %vec.ind, +; IND: %vec.ind = phi <2 x i32> [ , %vector.ph ], [ %vec.ind.next, %vector.body ] ; IND: %index.next = add i32 %index, 2 +; IND: %vec.ind.next = add <2 x i32> %vec.ind, ; IND: %[[CMP:.*]] = icmp eq i32 %index.next ; IND: br i1 %[[CMP]] ; UNROLL-LABEL: veciv ; UNROLL: vector.body: ; UNROLL: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] -; UNROLL: %vec.ind = phi <2 x i32> [ , %vector.ph ], [ %step.add1, %vector.body ] +; UNROLL: %vec.ind = phi <2 x i32> [ , %vector.ph ], [ %vec.ind.next, %vector.body ] ; UNROLL: %step.add = add <2 x i32> %vec.ind, -; UNROLL: %step.add1 = add <2 x i32> %vec.ind, ; UNROLL: %index.next = add i32 %index, 4 +; UNROLL: %vec.ind.next = add <2 x i32> %vec.ind, ; UNROLL: %[[CMP:.*]] = icmp eq i32 %index.next ; UNROLL: br i1 %[[CMP]] define void @veciv(i32* nocapture %a, i32 %start, i32 %k) { @@ -471,8 +471,8 @@ ; IND: vector.body: ; IND: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] ; IND: %[[VECIND:.*]] = phi <2 x i32> [ , %vector.ph ], [ %[[STEPADD:.*]], %vector.body ] -; IND: %[[STEPADD]] = add <2 x i32> %[[VECIND]], ; IND: %index.next = add i64 %index, 2 +; IND: %[[STEPADD]] = add <2 x i32> %[[VECIND]], ; IND: %[[CMP:.*]] = icmp eq i64 %index.next ; IND: br i1 %[[CMP]] define void @trunciv(i32* nocapture %a, i32 %start, i64 %k) { @@ -499,9 +499,9 @@ ; IND: %[[START:.*]] = add <2 x i32> %[[SPLAT]], ; IND-LABEL: vector.body: ; IND: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] -; IND: %vec.ind = phi <2 x i32> [ %[[START]], %vector.ph ], [ %step.add, %vector.body ] -; IND: %step.add = add <2 x i32> %vec.ind, +; IND: %vec.ind = phi <2 x i32> [ %[[START]], %vector.ph ], [ %vec.ind.next, %vector.body ] ; IND: %index.next = add i32 %index, 2 +; IND: %vec.ind.next = add <2 x i32> %vec.ind, ; IND: %[[CMP:.*]] = icmp eq i32 %index.next ; IND: br i1 %[[CMP]] ; UNROLL-LABEL: nonprimary @@ -511,10 +511,10 @@ ; UNROLL: %[[START:.*]] = add <2 x i32> %[[SPLAT]], ; UNROLL-LABEL: vector.body: ; UNROLL: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] -; UNROLL: %vec.ind = phi <2 x i32> [ %[[START]], %vector.ph ], [ %step.add1, %vector.body ] +; UNROLL: %vec.ind = phi <2 x i32> [ %[[START]], %vector.ph ], [ %vec.ind.next, %vector.body ] ; UNROLL: %step.add = add <2 x i32> %vec.ind, -; UNROLL: %step.add1 = add <2 x i32> %vec.ind, ; UNROLL: %index.next = add i32 %index, 4 +; UNROLL: %vec.ind.next = add <2 x i32> %vec.ind, ; UNROLL: %[[CMP:.*]] = icmp eq i32 %index.next ; UNROLL: br i1 %[[CMP]] define void @nonprimary(i32* nocapture %a, i32 %start, i32 %i, i32 %k) { Index: llvm/trunk/test/Transforms/LoopVectorize/induction_plus.ll =================================================================== --- llvm/trunk/test/Transforms/LoopVectorize/induction_plus.ll +++ llvm/trunk/test/Transforms/LoopVectorize/induction_plus.ll @@ -7,10 +7,11 @@ ;CHECK-LABEL: @array_at_plus_one( ;CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] -;CHECK: %vec.ind = phi <4 x i64> [ , %vector.ph ], [ %step.add, %vector.body ] -;CHECK: %vec.ind1 = phi <4 x i32> [ , %vector.ph ], [ %step.add2, %vector.body ] -;CHECK: add <4 x i64> %vec.ind, +;CHECK: %vec.ind = phi <4 x i64> [ , %vector.ph ], [ %vec.ind.next, %vector.body ] +;CHECK: %vec.ind1 = phi <4 x i32> [ , %vector.ph ], [ %vec.ind.next2, %vector.body ] ;CHECK: add nsw <4 x i64> %vec.ind, +;CHECK: %vec.ind.next = add <4 x i64> %vec.ind, +;CHECK: %vec.ind.next2 = add <4 x i32> %vec.ind1, ;CHECK: ret i32 define i32 @array_at_plus_one(i32 %n) nounwind uwtable ssp { %1 = icmp sgt i32 %n, 0