diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8748,7 +8748,7 @@ VPWidenIntOrFpInductionRecipe * VPRecipeBuilder::tryToOptimizeInductionPHI(PHINode *Phi, - ArrayRef Operands) const { + ArrayRef Operands) { // Check if this is an integer or fp induction. If so, build the recipe that // produces its scalar and vector values. InductionDescriptor II = Legal->getInductionVars().lookup(Phi); @@ -8757,8 +8757,13 @@ assert(II.getStartValue() == Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader())); const SmallVectorImpl &Casts = II.getCastInsts(); - return new VPWidenIntOrFpInductionRecipe( - Phi, Operands[0], Casts.empty() ? nullptr : Casts.front()); + + Instruction *CastI = + cast_or_null(Casts.empty() ? nullptr : Casts.front()); + auto *R = new VPWidenIntOrFpInductionRecipe(Phi, Operands[0], CastI); + if (CastI) + setRecipe(CastI, R); + return R; } return nullptr; @@ -8932,7 +8937,10 @@ auto *PN = cast(R->getUnderlyingValue()); VPRecipeBase *IncR = getRecipe(cast(PN->getIncomingValueForBlock(OrigLatch))); - R->addOperand(IncR->getVPSingleValue()); + if (isa(IncR)) + R->addOperand(IncR->getVPValue(0)); + else + R->addOperand(IncR->getVPSingleValue()); } } diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -75,7 +75,7 @@ /// Check if an induction recipe should be constructed for \I. If so build and /// return it. If not, return null. VPWidenIntOrFpInductionRecipe * - tryToOptimizeInductionPHI(PHINode *Phi, ArrayRef Operands) const; + tryToOptimizeInductionPHI(PHINode *Phi, ArrayRef Operands); /// Optimize the special case where the operand of \p I is a constant integer /// induction variable. diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -933,3 +933,41 @@ exit: ret void } + +; Test case where %iv.2.ext and %iv.2.conv become redundant due to the SCEV +; predicates generated for the vector loop. They should be removed in the +; vector loop. +define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n, i32 %step, i32* %ptr) { +; CHECK-LABEL: @test_optimized_cast_induction_feeding_first_order_recurrence( +; CHECK-LABEL: vector.body: +; CHECK-NEXT: [[MAIN_IV:%.+]] = phi i64 [ 0, %vector.ph ], [ [[MAIN_IV_NEXT:%.+]], %vector.body ] +; CHECK-NEXT: [[VEC_RECUR:%.+]] = phi <2 x i32> [ , %vector.ph ], [ [[VEC_IV:%.+]], %vector.body ] +; CHECK-NEXT: [[VEC_IV]] = phi <2 x i32> [ %induction, %vector.ph ], [ [[VEC_IV_NEXT:%.+]], %vector.body ] +; CHECK-NEXT: [[MAIN_IV_0:%.+]] = add i64 [[MAIN_IV]], 0 +; CHECK-NEXT: [[RECUR_SHUFFLE:%.+]] = shufflevector <2 x i32> [[VEC_RECUR]], <2 x i32> [[VEC_IV]], <2 x i32> +; CHECK-NEXT: [[GEP0:%.+]] = getelementptr inbounds i32, i32* %ptr, i64 [[MAIN_IV_0]] +; CHECK-NEXT: [[GEP1:%.+]] = getelementptr inbounds i32, i32* [[GEP0]], i32 0 +; CHECK-NEXT: [[GEP_CAST:%.+]] = bitcast i32* [[GEP1]] to <2 x i32>* +; CHECK-NEXT: store <2 x i32> [[RECUR_SHUFFLE]], <2 x i32>* [[GEP_CAST]], align 4 +; CHECK-NEXT: [[MAIN_IV_NEXT]] = add nuw i64 [[MAIN_IV]], 2 +; CHECK-NEXT: [[VEC_IV_NEXT]] = add <2 x i32> [[VEC_IV]], +; +entry: + br label %loop + +loop: + %for = phi i32 [ 0, %entry ], [ %iv.2.conv, %loop ] + %iv.1 = phi i64 [ 0, %entry ], [ %iv.1.next, %loop ] + %iv.2 = phi i32 [ 0, %entry ], [ %iv.2.next, %loop ] + %iv.2.ext = shl i32 %iv.2, 24 + %iv.2.conv = ashr exact i32 %iv.2.ext, 24 + %gep = getelementptr inbounds i32, i32* %ptr, i64 %iv.1 + store i32 %for, i32* %gep, align 4 + %iv.2.next = add nsw i32 %iv.2.conv, %step + %iv.1.next = add nuw nsw i64 %iv.1, 1 + %exitcond = icmp eq i64 %iv.1.next, %n + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +}