diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -480,7 +480,7 @@ VPTransformState &State); /// Fix the vectorized code, taking care of header phi's, live-outs, and more. - void fixVectorizedLoop(VPTransformState &State); + void fixVectorizedLoop(VPTransformState &State, VPlan &Plan); // Return true if any runtime check is added. bool areSafetyChecksAdded() { return AddedSafetyChecks; } @@ -3646,7 +3646,8 @@ } } -void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { +void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, + VPlan &Plan) { // Insert truncates and extends for any truncated instructions as hints to // InstCombine. if (VF.isVector()) @@ -3668,7 +3669,8 @@ // Forget the original basic block. PSE.getSE()->forgetLoop(OrigLoop); - Loop *VectorLoop = LI->getLoopFor(State.CFG.PrevBB); + VPBasicBlock *LatchVPBB = Plan.getVectorLoopRegion()->getExitBasicBlock(); + Loop *VectorLoop = LI->getLoopFor(State.CFG.VPBB2IRBB[LatchVPBB]); // If we inserted an edge from the middle block to the unique exit block, // update uses outside the loop (phis) to account for the newly inserted // edge. @@ -3864,8 +3866,10 @@ setDebugLocFromInst(LoopExitInst); Type *PhiTy = OrigPhi->getType(); - BasicBlock *VectorLoopLatch = - LI->getLoopFor(State.CFG.PrevBB)->getLoopLatch(); + + VPBasicBlock *LatchVPBB = + PhiR->getParent()->getEnclosingLoopRegion()->getExitBasicBlock(); + BasicBlock *VectorLoopLatch = State.CFG.VPBB2IRBB[LatchVPBB]; // If tail is folded by masking, the vector value to leave the loop should be // a Select choosing between the vectorized LoopExitInst and vectorized Phi, // instead of the former. For an inloop reduction the reduction will already @@ -7628,7 +7632,9 @@ makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll, LLVMLoopVectorizeFollowupVectorized}); - Loop *L = LI->getLoopFor(State.CFG.PrevBB); + VPBasicBlock *HeaderVPBB = + BestVPlan.getVectorLoopRegion()->getEntryBasicBlock(); + Loop *L = LI->getLoopFor(State.CFG.VPBB2IRBB[HeaderVPBB]); if (VectorizedLoopID.hasValue()) L->setLoopID(VectorizedLoopID.getValue()); else { @@ -7646,7 +7652,7 @@ // 3. Fix the vectorized code: take care of header phi's, live-outs, // predication, updating analyses. - ILV.fixVectorizedLoop(State); + ILV.fixVectorizedLoop(State, BestVPlan); ILV.printDebugTracesAtEnd(); } @@ -8750,8 +8756,8 @@ // --------------------------------------------------------------------------- // Create initial VPlan skeleton, starting with a block for the pre-header, - // followed by a region for the vector loop. The skeleton vector loop region - // contains a header and latch block. + // followed by a region for the vector loop, followed by the middle block. The + // skeleton vector loop region contains a header and latch block. VPBasicBlock *Preheader = new VPBasicBlock("vector.ph"); auto Plan = std::make_unique(Preheader); @@ -8760,6 +8766,8 @@ VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB); auto *TopRegion = new VPRegionBlock(HeaderVPBB, LatchVPBB, "vector loop"); VPBlockUtils::insertBlockAfter(TopRegion, Preheader); + VPBasicBlock *MiddleVPBB = new VPBasicBlock("middle.block"); + VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion); Instruction *DLInst = getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -325,27 +325,33 @@ return R && !R->isReplicator(); }; - // 1. Create an IR basic block, or reuse the last one if possible. - // The last IR basic block is reused, as an optimization, in three cases: - // A. the first VPBB reuses the loop pre-header BB - when PrevVPBB is null; - // B. when the current VPBB has a single (hierarchical) predecessor which - // is PrevVPBB and the latter has a single (hierarchical) successor which - // both are in the same non-replicator region; and - // C. when the current VPBB is an entry of a region replica - where PrevVPBB - // is the exit of this region from a previous instance, or the predecessor - // of this region. - if (PrevVPBB && /* A */ - !((SingleHPred = getSingleHierarchicalPredecessor()) && - SingleHPred->getExitBasicBlock() == PrevVPBB && - PrevVPBB->getSingleHierarchicalSuccessor() && - (SingleHPred->getParent() == getEnclosingLoopRegion() && - !IsNonReplicateR(SingleHPred))) && /* B */ - !(Replica && getPredecessors().empty())) { /* C */ + // 1. Create an IR basic block, or reuse the last one or ExitBB if possible. + if (getPlan()->getVectorLoopRegion()->getSingleSuccessor() == this) { + // ExitBB can be re-used for the exit block of the Plan. + NewBB = State->CFG.ExitBB; + State->CFG.PrevBB = NewBB; + } else if (PrevVPBB && /* A */ + !((SingleHPred = getSingleHierarchicalPredecessor()) && + SingleHPred->getExitBasicBlock() == PrevVPBB && + PrevVPBB->getSingleHierarchicalSuccessor() && + (SingleHPred->getParent() == getEnclosingLoopRegion() && + !IsNonReplicateR(SingleHPred))) && /* B */ + !(Replica && getPredecessors().empty())) { /* C */ + // The last IR basic block is reused, as an optimization, in three cases: + // A. the first VPBB reuses the loop pre-header BB - when PrevVPBB is null; + // B. when the current VPBB has a single (hierarchical) predecessor which + // is PrevVPBB and the latter has a single (hierarchical) successor which + // both are in the same non-replicator region; and + // C. when the current VPBB is an entry of a region replica - where PrevVPBB + // is the exit of this region from a previous instance, or the + // predecessor of this region. + NewBB = createEmptyBasicBlock(State->CFG); State->Builder.SetInsertPoint(NewBB); // Temporarily terminate with unreachable until CFG is rewired. UnreachableInst *Terminator = State->Builder.CreateUnreachable(); - // Register NewBB in its loop. In innermost loops its the same for all BB's. + // Register NewBB in its loop. In innermost loops its the same for all + // BB's. if (State->CurrentVectorLoop) State->CurrentVectorLoop->addBasicBlockToLoop(NewBB, *State->LI); State->Builder.SetInsertPoint(Terminator); @@ -969,7 +975,8 @@ } } - BasicBlock *VectorLatchBB = State->CFG.PrevBB; + VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitBasicBlock(); + BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB]; // Fix the latch value of canonical, reduction and first-order recurrences // phis in the vector loop. diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -72,6 +72,9 @@ ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.block +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -145,6 +148,9 @@ ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.block +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -215,6 +221,9 @@ ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.block +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -312,6 +321,9 @@ ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.block +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -407,6 +419,9 @@ ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.block +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -27,6 +27,9 @@ ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.block +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -73,6 +76,9 @@ ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.block +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -117,6 +123,9 @@ ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.block +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -184,6 +193,9 @@ ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.block +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -243,6 +255,9 @@ ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.block +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; @@ -300,6 +315,11 @@ ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.block +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: +; CHECK-NEXT: No successors +; CHECK-NEXT:} entry: br label %for.body @@ -376,7 +396,10 @@ ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT:} -; CHECK-NEXT:No successors +; CHECK-NEXT: Successor(s): middle.block +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: +; CHECK-NEXT: No successors ; CHECK-NEXT:} ; entry: @@ -435,6 +458,9 @@ ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_INC]]> vp<%0> ; CHECK-NEXT: No successors ; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.block +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: ; CHECK-NEXT: No successors ; CHECK-NEXT: } ; diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll @@ -49,6 +49,9 @@ ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]> vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.block +; CHECK-EMPTY: +; CHECK-NEXT: middle.block: ; CHECK-NEXT: No successors ; CHECK-NEXT: } ;