diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9192,15 +9192,40 @@ return nullptr; }; - // If the target is in a replication region, make sure to move Sink to the - // block after it, not into the replication region itself. if (auto *TargetRegion = GetReplicateRegion(Target)) { - assert(TargetRegion->getNumSuccessors() == 1 && "Expected SESE region!"); - assert(!GetReplicateRegion(Sink) && - "cannot sink a region into another region yet"); - VPBasicBlock *NextBlock = - cast(TargetRegion->getSuccessors().front()); - Sink->moveBefore(*NextBlock, NextBlock->getFirstNonPhi()); + assert(TargetRegion->getNumSuccessors() == 1 && + TargetRegion->getNumPredecessors() == 1 && + "Expected SESE region!"); + assert(Target->getParent()->size() == 1 && + "Target must be in a replicator with a single recipe"); + + if (auto *SinkRegion = GetReplicateRegion(Sink)) { + // If the recipe to sink is also in a recpliate region, move the sink + // region after the target region. + assert(SinkRegion->getNumSuccessors() == 1 && + SinkRegion->getNumPredecessors() == 1 && + "Expected SESE region!"); + assert(Sink->getParent()->size() == 1 && + "Sink must be in a replicator with a single recipe"); + + auto *SinkPred = SinkRegion->getSinglePredecessor(); + auto *SinkSucc = SinkRegion->getSingleSuccessor(); + VPBlockUtils::disconnectBlocks(SinkPred, SinkRegion); + VPBlockUtils::disconnectBlocks(SinkRegion, SinkSucc); + VPBlockUtils::connectBlocks(SinkPred, SinkSucc); + + auto *TargetSucc = TargetRegion->getSingleSuccessor(); + VPBlockUtils::disconnectBlocks(TargetRegion, TargetSucc); + VPBlockUtils::connectBlocks(TargetRegion, SinkRegion); + VPBlockUtils::connectBlocks(SinkRegion, TargetSucc); + } else { + // If the target is in a replication region, make sure to move Sink to + // the block after it, not into the replication region itself. + + VPBasicBlock *NextBlock = + cast(TargetRegion->getSuccessors().front()); + Sink->moveBefore(*NextBlock, NextBlock->getFirstNonPhi()); + } continue; } diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -debug-only=loop-vectorize 2>&1 | FileCheck %s +; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -disable-output -debug-only=loop-vectorize 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" @@ -306,3 +306,92 @@ exit: ret void } + +; Test case that requires sinking a recipe in a replicate region after another replicate region. +define void @sink_replicate_region_after_replicate_region(i32* %ptr, i32 %x, i8 %y) optsize { +; CHECK-LABEL: sink_replicate_region_after_replicate_region +; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { +; CHECK-NEXT: loop: +; CHECK-NEXT: WIDEN-PHI %recur = phi 0, %recur.next +; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next +; CHECK-NEXT: EMIT vp<%3> = icmp ule ir<%iv> vp<%0> +; CHECK-NEXT: Successor(s): loop.0 +; CHECK-EMPTY: +; CHECK-NEXT: loop.0: +; CHECK-NEXT: Successor(s): loop.1 +; CHECK-EMPTY: +; CHECK-NEXT: loop.1: +; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y> +; CHECK-NEXT: Successor(s): pred.srem +; CHECK-EMPTY: +; CHECK-NEXT: pred.srem: { +; CHECK-NEXT: pred.srem.entry: +; CHECK-NEXT: BRANCH-ON-MASK vp<%3> +; CHECK-NEXT: Successor(s): pred.srem.if, pred.srem.continue +; CHECK-NEXT: CondBit: vp<%3> (loop) +; CHECK-EMPTY: +; CHECK-NEXT: pred.srem.if: +; CHECK-NEXT: REPLICATE ir<%rem> = srem ir<%recur>, ir<%x> +; CHECK-NEXT: Successor(s): pred.srem.continue +; CHECK-EMPTY: +; CHECK-NEXT: pred.srem.continue: +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%6> = ir<%rem> +; CHECK-NEXT: No successors +; CHECK-NEXT: } + +; CHECK: pred.sdiv: { +; CHECK-NEXT: pred.sdiv.entry: +; CHECK-NEXT: BRANCH-ON-MASK vp<%3> +; CHECK-NEXT: Successor(s): pred.sdiv.if, pred.sdiv.continue +; CHECK-NEXT: CondBit: vp<%3> (loop) +; CHECK-EMPTY: +; CHECK-NEXT: pred.sdiv.if: +; CHECK-NEXT: REPLICATE ir<%rem.div> = sdiv ir<20>, vp<%6> +; CHECK-NEXT: Successor(s): pred.sdiv.continue +; CHECK-EMPTY: +; CHECK-NEXT: pred.sdiv.continue: +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%8> = ir<%rem.div> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): loop.1.split +; CHECK-EMPTY: +; CHECK-NEXT: loop.1.split: +; CHECK-NEXT: Successor(s): pred.store +; CHECK-EMPTY: +; CHECK-NEXT: pred.store: { +; CHECK-NEXT: pred.store.entry: +; CHECK-NEXT: BRANCH-ON-MASK vp<%3> +; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue +; CHECK-NEXT: CondBit: vp<%3> (loop) +; CHECK-EMPTY: +; CHECK-NEXT: pred.store.if: +; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, ir<%iv> +; CHECK-NEXT: REPLICATE store vp<%8>, ir<%gep> +; CHECK-NEXT: Successor(s): pred.store.continue +; CHECK-EMPTY: +; CHECK-NEXT: pred.store.continue: +; CHECK-NEXT: No successors +; CHECK-NEXT: } + +; CHECK: loop.2: +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %recur = phi i32 [ 0, %entry ], [ %recur.next, %loop ] + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %rem = srem i32 %recur, %x + %rem.div = sdiv i32 20, %rem + %recur.next = sext i8 %y to i32 + %gep = getelementptr i32, i32* %ptr, i32 %iv + store i32 %rem.div, i32* %gep + %iv.next = add nsw i32 %iv, 1 + %C = icmp sgt i32 %iv.next, %recur.next + br i1 %C, label %exit, label %loop + +exit: ; preds = %loop + ret void +}