diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9192,52 +9192,60 @@ auto GetReplicateRegion = [](VPRecipeBase *R) -> VPRegionBlock * { auto *Region = dyn_cast_or_null(R->getParent()->getParent()); - if (Region && Region->isReplicator()) + if (Region && Region->isReplicator()) { + assert(Region->getNumSuccessors() == 1 && + Region->getNumPredecessors() == 1 && "Expected SESE region!"); + assert(R->getParent()->size() == 1 && + "A recipe in an original replicator region must be the only " + "recipe in its block"); return Region; + } return nullptr; }; - - // If the target is in a replication region, make sure to move Sink to the - // block after it, not into the replication region itself. - if (auto *TargetRegion = GetReplicateRegion(Target)) { - assert(TargetRegion->getNumSuccessors() == 1 && "Expected SESE region!"); - assert(!GetReplicateRegion(Sink) && - "cannot sink a region into another region yet"); - VPBasicBlock *NextBlock = - cast(TargetRegion->getSuccessors().front()); - Sink->moveBefore(*NextBlock, NextBlock->getFirstNonPhi()); - continue; - } - + auto *TargetRegion = GetReplicateRegion(Target); auto *SinkRegion = GetReplicateRegion(Sink); - // Unless the sink source is in a replicate region, sink the recipe - // directly. if (!SinkRegion) { - Sink->moveAfter(Target); + // If the sink source is not a replicate region, sink the recipe directly. + if (TargetRegion) { + // The target is in a replication region, make sure to move Sink to + // the block after it, not into the replication region itself. + VPBasicBlock *NextBlock = + cast(TargetRegion->getSuccessors().front()); + Sink->moveBefore(*NextBlock, NextBlock->getFirstNonPhi()); + } else + Sink->moveAfter(Target); continue; } - // If the sink source is in a replicate region, we need to move the whole - // replicate region, which should only contain a single recipe in the main - // block. - assert(Sink->getParent()->size() == 1 && - "parent must be a replicator with a single recipe"); - auto *SplitBlock = - Target->getParent()->splitAt(std::next(Target->getIterator())); - - auto *Pred = SinkRegion->getSinglePredecessor(); - auto *Succ = SinkRegion->getSingleSuccessor(); - VPBlockUtils::disconnectBlocks(Pred, SinkRegion); - VPBlockUtils::disconnectBlocks(SinkRegion, Succ); - VPBlockUtils::connectBlocks(Pred, Succ); - - auto *SplitPred = SplitBlock->getSinglePredecessor(); - - VPBlockUtils::disconnectBlocks(SplitPred, SplitBlock); - VPBlockUtils::connectBlocks(SplitPred, SinkRegion); - VPBlockUtils::connectBlocks(SinkRegion, SplitBlock); - if (VPBB == SplitPred) - VPBB = SplitBlock; + // The sink source is in a replicate region. Unhook the region from the CFG. + auto *SinkPred = SinkRegion->getSinglePredecessor(); + auto *SinkSucc = SinkRegion->getSingleSuccessor(); + VPBlockUtils::disconnectBlocks(SinkPred, SinkRegion); + VPBlockUtils::disconnectBlocks(SinkRegion, SinkSucc); + VPBlockUtils::connectBlocks(SinkPred, SinkSucc); + + if (TargetRegion) { + // The target recipe is also in a replicate region, move the sink region + // after the target region. + auto *TargetSucc = TargetRegion->getSingleSuccessor(); + VPBlockUtils::disconnectBlocks(TargetRegion, TargetSucc); + VPBlockUtils::connectBlocks(TargetRegion, SinkRegion); + VPBlockUtils::connectBlocks(SinkRegion, TargetSucc); + } else { + // The sink source is in a replicate region, we need to move the whole + // replicate region, which should only contain a single recipe in the main + // block. + auto *SplitBlock = + Target->getParent()->splitAt(std::next(Target->getIterator())); + + auto *SplitPred = SplitBlock->getSinglePredecessor(); + + VPBlockUtils::disconnectBlocks(SplitPred, SplitBlock); + VPBlockUtils::connectBlocks(SplitPred, SinkRegion); + VPBlockUtils::connectBlocks(SinkRegion, SplitBlock); + if (VPBB == SplitPred) + VPBB = SplitBlock; + } } // Interleave memory: for each Interleave Group we marked earlier as relevant diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -debug-only=loop-vectorize 2>&1 | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -disable-output -debug-only=loop-vectorize 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" @@ -306,3 +306,92 @@ exit: ret void } + +; Test case that requires sinking a recipe in a replicate region after another replicate region. +define void @sink_replicate_region_after_replicate_region(i32* %ptr, i32 %x, i8 %y) optsize { +; CHECK-LABEL: sink_replicate_region_after_replicate_region +; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { +; CHECK-NEXT: loop: +; CHECK-NEXT: WIDEN-PHI %recur = phi 0, %recur.next +; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next +; CHECK-NEXT: EMIT vp<%3> = icmp ule ir<%iv> vp<%0> +; CHECK-NEXT: Successor(s): loop.0 +; CHECK-EMPTY: +; CHECK-NEXT: loop.0: +; CHECK-NEXT: Successor(s): loop.1 +; CHECK-EMPTY: +; CHECK-NEXT: loop.1: +; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y> +; CHECK-NEXT: Successor(s): pred.srem +; CHECK-EMPTY: +; CHECK-NEXT: pred.srem: { +; CHECK-NEXT: pred.srem.entry: +; CHECK-NEXT: BRANCH-ON-MASK vp<%3> +; CHECK-NEXT: Successor(s): pred.srem.if, pred.srem.continue +; CHECK-NEXT: CondBit: vp<%3> (loop) +; CHECK-EMPTY: +; CHECK-NEXT: pred.srem.if: +; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%recur>, ir<%x> +; CHECK-NEXT: Successor(s): pred.srem.continue +; CHECK-EMPTY: +; CHECK-NEXT: pred.srem.continue: +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%6> = ir<%rem> +; CHECK-NEXT: No successors +; CHECK-NEXT: } + +; CHECK: pred.sdiv: { +; CHECK-NEXT: pred.sdiv.entry: +; CHECK-NEXT: BRANCH-ON-MASK vp<%3> +; CHECK-NEXT: Successor(s): pred.sdiv.if, pred.sdiv.continue +; CHECK-NEXT: CondBit: vp<%3> (loop) +; CHECK-EMPTY: +; CHECK-NEXT: pred.sdiv.if: +; CHECK-NEXT: REPLICATE ir<%rem.div> = sdiv ir<20>, vp<%6> +; CHECK-NEXT: Successor(s): pred.sdiv.continue +; CHECK-EMPTY: +; CHECK-NEXT: pred.sdiv.continue: +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%8> = ir<%rem.div> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): loop.1.split +; CHECK-EMPTY: +; CHECK-NEXT: loop.1.split: +; CHECK-NEXT: Successor(s): pred.store +; CHECK-EMPTY: +; CHECK-NEXT: pred.store: { +; CHECK-NEXT: pred.store.entry: +; CHECK-NEXT: BRANCH-ON-MASK vp<%3> +; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue +; CHECK-NEXT: CondBit: vp<%3> (loop) +; CHECK-EMPTY: +; CHECK-NEXT: pred.store.if: +; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, ir<%iv> +; CHECK-NEXT: REPLICATE store vp<%8>, ir<%gep> +; CHECK-NEXT: Successor(s): pred.store.continue +; CHECK-EMPTY: +; CHECK-NEXT: pred.store.continue: +; CHECK-NEXT: No successors +; CHECK-NEXT: } + +; CHECK: loop.2: +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %rem = srem i32 %recur, %x + %rem.div = sdiv i32 20, %rem + %recur.next = sext i8 %y to i32 + %gep = getelementptr i32, i32* %ptr, i32 %iv + store i32 %rem.div, i32* %gep + %iv.next = add nsw i32 %iv, 1 + %C = icmp sgt i32 %iv.next, %recur.next + br i1 %C, label %exit, label %loop + +exit: ; preds = %loop + ret void +}