diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8790,13 +8790,17 @@ return VPBB; } LLVM_DEBUG(dbgs() << "LV: Scalarizing and predicating:" << *I << "\n"); - assert(VPBB->getSuccessors().empty() && - "VPBB has successors when handling predicated replication."); + + VPBlockBase *SingleSucc = VPBB->getSingleSuccessor(); + assert(SingleSucc && "VPBB must have a single successor when handling " + "predicated replication."); + VPBlockUtils::disconnectBlocks(VPBB, SingleSucc); // Record predicated instructions for above packing optimizations. VPBlockBase *Region = createReplicateRegion(I, Recipe, Plan); VPBlockUtils::insertBlockAfter(Region, VPBB); auto *RegSucc = new VPBasicBlock(); VPBlockUtils::insertBlockAfter(RegSucc, Region); + VPBlockUtils::connectBlocks(RegSucc, SingleSucc); return RegSucc; } @@ -9017,30 +9021,32 @@ // visit each basic block after having visited its predecessor basic blocks. // --------------------------------------------------------------------------- - auto Plan = std::make_unique(); + // Create initial VPlan skeleton, with separate header and latch blocks. + VPBasicBlock *HeaderVPBB = new VPBasicBlock(OrigLoop->getHeader()->getName()); + VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch"); + VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB); + auto *TopRegion = new VPRegionBlock(HeaderVPBB, LatchVPBB, "vector loop"); + auto Plan = std::make_unique(TopRegion); // Scan the body of the loop in a topological order to visit each basic block // after having visited its predecessor basic blocks. LoopBlocksDFS DFS(OrigLoop); DFS.perform(LI); - VPBasicBlock *VPBB = nullptr; - VPBasicBlock *HeaderVPBB = nullptr; + VPBasicBlock *VPBB = HeaderVPBB; + bool FillHeaderVPBB = true; SmallVector InductionsToMove; for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) { // Relevant instructions from basic block BB will be grouped into VPRecipe // ingredients and fill a new VPBasicBlock. unsigned VPBBsForBB = 0; - auto *FirstVPBBForBB = new VPBasicBlock(BB->getName()); - if (VPBB) - VPBlockUtils::insertBlockAfter(FirstVPBBForBB, VPBB); + if (FillHeaderVPBB) + FillHeaderVPBB = false; else { - auto *TopRegion = new VPRegionBlock("vector loop"); - TopRegion->setEntry(FirstVPBBForBB); - Plan->setEntry(TopRegion); - HeaderVPBB = FirstVPBBForBB; + auto *FirstVPBBForBB = new VPBasicBlock(BB->getName()); + VPBlockUtils::insertBlockAfter(FirstVPBBForBB, VPBB); + VPBB = FirstVPBBForBB; } - VPBB = FirstVPBBForBB; Builder.setInsertPoint(VPBB); // Introduce each ingredient into VPlan. @@ -9183,8 +9189,6 @@ } } - cast(Plan->getEntry())->setExit(VPBB); - VPlanTransforms::removeRedundantInductionCasts(*Plan); // Now that sink-after is done, move induction recipes for optimized truncates @@ -9273,6 +9277,18 @@ RSO.flush(); Plan->setName(PlanName); + // Fold Exit block into its predecessor if possible. + auto *LatchPred = dyn_cast(LatchVPBB->getSinglePredecessor()); + if (LatchPred) { + assert(LatchPred->getSingleSuccessor() == LatchVPBB && + "latch must be executed unconditionally from LatchPred"); + for (VPRecipeBase &R : make_early_inc_range(*LatchVPBB)) + R.moveBefore(*LatchPred, LatchPred->end()); + VPBlockUtils::disconnectBlocks(LatchPred, LatchVPBB); + TopRegion->setExit(LatchPred); + delete LatchVPBB; + } + assert(VPlanVerifier::verifyPlanIsValid(*Plan) && "VPlan is invalid"); return Plan; } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2358,12 +2358,14 @@ static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) { assert(NewBlock->getSuccessors().empty() && "Can't insert new block with successors."); - // TODO: move successors from BlockPtr to NewBlock when this functionality - // is necessary. For now, setBlockSingleSuccessor will assert if BlockPtr - // already has successors. + NewBlock->setParent(BlockPtr->getParent()); + SmallVector Succs(BlockPtr->successors()); + for (VPBlockBase *Succ : Succs) { + disconnectBlocks(BlockPtr, Succ); + connectBlocks(NewBlock, Succ); + } BlockPtr->setOneSuccessor(NewBlock); NewBlock->setPredecessors({BlockPtr}); - NewBlock->setParent(BlockPtr->getParent()); } /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p