diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8761,8 +8761,10 @@ Instruction *CastI = cast_or_null(Casts.empty() ? nullptr : Casts.front()); auto *R = new VPWidenIntOrFpInductionRecipe(Phi, Operands[0], CastI); - if (CastI) + if (CastI) { + recordRecipeOf(CastI); setRecipe(CastI, R); + } return R; } @@ -9090,8 +9092,10 @@ // Record the incoming value from the backedge, so we can add the incoming // value from the backedge after all recipes have been created. - recordRecipeOf(cast( - Phi->getIncomingValueForBlock(OrigLoop->getLoopLatch()))); + auto *IncI = cast( + Phi->getIncomingValueForBlock(OrigLoop->getLoopLatch())); + if (!recipeAlreadyRecorded(IncI)) + recordRecipeOf(IncI); PhisToFix.push_back(PhiRecipe); } else { // TODO: record start and backedge value for remaining pointer induction diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -147,6 +147,11 @@ Ingredient2Recipe[I] = nullptr; } + /// Returns true if \p I is already marked for recording. + bool recipeAlreadyRecorded(Instruction *I) const { + return Ingredient2Recipe.find(I) != Ingredient2Recipe.end(); + } + /// Return the recipe created for given ingredient. VPRecipeBase *getRecipe(Instruction *I) { assert(Ingredient2Recipe.count(I) && diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -971,3 +971,39 @@ exit: ret void } + +; Similar to @test_optimized_cast_induction_feeding_first_order_recurrence, but +; with different phi order, which impacts the order recipes are created and +; recorded. +define void @test_optimized_cast_induction_feeding_first_order_recurrence_2(i64 %n, i32 %step, i32* %ptr) { +; CHECK-LABEL: @test_optimized_cast_induction_feeding_first_order_recurrence( +; CHECK-LABEL: vector.body: +; CHECK-NEXT: [[MAIN_IV:%.+]] = phi i64 [ 0, %vector.ph ], [ [[MAIN_IV_NEXT:%.+]], %vector.body ] +; CHECK-NEXT: [[VEC_RECUR:%.+]] = phi <2 x i32> [ , %vector.ph ], [ [[VEC_IV:%.+]], %vector.body ] +; CHECK-NEXT: [[VEC_IV]] = phi <2 x i32> [ %induction, %vector.ph ], [ [[VEC_IV_NEXT:%.+]], %vector.body ] +; CHECK-NEXT: [[MAIN_IV_0:%.+]] = add i64 [[MAIN_IV]], 0 +; CHECK-NEXT: [[RECUR_SHUFFLE:%.+]] = shufflevector <2 x i32> [[VEC_RECUR]], <2 x i32> [[VEC_IV]], <2 x i32> +; CHECK-NEXT: [[GEP0:%.+]] = getelementptr inbounds i32, i32* %ptr, i64 [[MAIN_IV_0]] +; CHECK-NEXT: [[GEP1:%.+]] = getelementptr inbounds i32, i32* [[GEP0]], i32 0 +; CHECK-NEXT: [[GEP_CAST:%.+]] = bitcast i32* [[GEP1]] to <2 x i32>* +; CHECK-NEXT: store <2 x i32> [[RECUR_SHUFFLE]], <2 x i32>* [[GEP_CAST]], align 4 +; CHECK-NEXT: [[MAIN_IV_NEXT]] = add nuw i64 [[MAIN_IV]], 2 +; CHECK-NEXT: [[VEC_IV_NEXT]] = add <2 x i32> [[VEC_IV]], +; +entry: + br label %loop + +loop: + %iv.1 = phi i64 [ 0, %entry ], [ %iv.1.next, %loop ] + %iv.2 = phi i32 [ 0, %entry ], [ %iv.2.next, %loop ] + %for = phi i32 [ 0, %entry ], [ %iv.2, %loop ] + %gep = getelementptr inbounds i32, i32* %ptr, i64 %iv.1 + store i32 %for, i32* %gep, align 4 + %iv.2.next = add nsw i32 %iv.2, %step + %iv.1.next = add nuw nsw i64 %iv.1, 1 + %exitcond = icmp eq i64 %iv.1.next, %n + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +}