Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8598,6 +8598,19 @@ for (auto &Entry : SinkAfter) { VPRecipeBase *Sink = RecipeBuilder.getRecipe(Entry.first); VPRecipeBase *Target = RecipeBuilder.getRecipe(Entry.second); + // If the target is in a replication region, make sure to move Sink to the + // block after it, not into the replication region itself. + if (auto *Region = + dyn_cast_or_null(Target->getParent()->getParent())) { + if (Region->isReplicator()) { + assert(Region->getNumSuccessors() == 1 && "Expected SESE region!"); + VPBasicBlock *NextBlock = + cast(Region->getSuccessors().front()); + Sink->removeFromParent(); + NextBlock->insert(Sink, NextBlock->begin()); + continue; + } + } Sink->moveAfter(Target); } Index: llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -645,3 +645,26 @@ for.end: ret void } + +; CHECK-LABEL: sink_into_replication_region +; CHECK: <4 x i32> +define i32 @sink_into_replication_region() { +bb: + br label %bb2 + + bb1: ; preds = %bb2 + %tmp = phi i32 [ %tmp6, %bb2 ] + ret i32 %tmp + + bb2: ; preds = %bb2, %bb + %tmp3 = phi i32 [ %tmp8, %bb2 ], [ undef, %bb ] + %tmp4 = phi i32 [ %tmp7, %bb2 ], [ undef, %bb ] + %tmp5 = phi i32 [ %tmp6, %bb2 ], [ undef, %bb ] + %tmp6 = add i32 %tmp5, %tmp4 + %tmp7 = udiv i32 219220132, %tmp3 + %tmp8 = add nsw i32 %tmp3, -1 + %tmp9 = icmp slt i32 %tmp3, 2 + br i1 %tmp9, label %bb1, label %bb2, !prof !2 +} + +!2 = !{!"branch_weights", i32 1, i32 1}