diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8785,13 +8785,17 @@ return VPBB; } LLVM_DEBUG(dbgs() << "LV: Scalarizing and predicating:" << *I << "\n"); - assert(VPBB->getSuccessors().empty() && - "VPBB has successors when handling predicated replication."); + + VPBlockBase *SingleSucc = VPBB->getSingleSuccessor(); + assert(SingleSucc && "VPBB must have a single successor when handling " + "predicated replication."); + VPBlockUtils::disconnectBlocks(VPBB, SingleSucc); // Record predicated instructions for above packing optimizations. VPBlockBase *Region = createReplicateRegion(I, Recipe, Plan); VPBlockUtils::insertBlockAfter(Region, VPBB); auto *RegSucc = new VPBasicBlock(); VPBlockUtils::insertBlockAfter(RegSucc, Region); + VPBlockUtils::connectBlocks(RegSucc, SingleSucc); return RegSucc; } @@ -9012,29 +9016,33 @@ // visit each basic block after having visited its predecessor basic blocks. // --------------------------------------------------------------------------- + // Create initial VPlan skeleton, with separate header and latch blocks. auto Plan = std::make_unique(); + VPBasicBlock *HeaderVPBB = new VPBasicBlock(OrigLoop->getHeader()->getName()); + auto *TopRegion = new VPRegionBlock("vector loop"); + TopRegion->setEntry(HeaderVPBB); + Plan->setEntry(TopRegion); + VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch"); + VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB); + TopRegion->setExit(LatchVPBB); // Scan the body of the loop in a topological order to visit each basic block // after having visited its predecessor basic blocks. LoopBlocksDFS DFS(OrigLoop); DFS.perform(LI); - VPBasicBlock *VPBB = nullptr; - VPBasicBlock *HeaderVPBB = nullptr; + VPBasicBlock *VPBB = HeaderVPBB; + bool FirstIter = true; SmallVector InductionsToMove; for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) { // Relevant instructions from basic block BB will be grouped into VPRecipe // ingredients and fill a new VPBasicBlock. unsigned VPBBsForBB = 0; - auto *FirstVPBBForBB = new VPBasicBlock(BB->getName()); - if (VPBB) + auto *FirstVPBBForBB = FirstIter ? VPBB : new VPBasicBlock(BB->getName()); + if (!FirstIter) VPBlockUtils::insertBlockAfter(FirstVPBBForBB, VPBB); - else { - auto *TopRegion = new VPRegionBlock("vector loop"); - TopRegion->setEntry(FirstVPBBForBB); - Plan->setEntry(TopRegion); - HeaderVPBB = FirstVPBBForBB; - } + else + FirstIter = false; VPBB = FirstVPBBForBB; Builder.setInsertPoint(VPBB); @@ -9178,8 +9186,6 @@ } } - cast(Plan->getEntry())->setExit(VPBB); - VPlanTransforms::removeRedundantInductionCasts(*Plan); // Now that sink-after is done, move induction recipes for optimized truncates @@ -9268,6 +9274,18 @@ RSO.flush(); Plan->setName(PlanName); + // Fold Exit block into its predecessor if possible. + auto *LatchPred = dyn_cast(LatchVPBB->getSinglePredecessor()); + if (LatchPred) { + assert(LatchPred->getSingleSuccessor() == LatchVPBB && + "latch must be executed unconditionally from LatchPred"); + for (VPRecipeBase &R : make_early_inc_range(*LatchVPBB)) + R.moveBefore(*LatchPred, LatchPred->end()); + VPBlockUtils::disconnectBlocks(LatchPred, LatchVPBB); + TopRegion->setExit(LatchPred); + delete LatchVPBB; + } + assert(VPlanVerifier::verifyPlanIsValid(*Plan) && "VPlan is invalid"); return Plan; } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2365,12 +2365,15 @@ static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) { assert(NewBlock->getSuccessors().empty() && "Can't insert new block with successors."); - // TODO: move successors from BlockPtr to NewBlock when this functionality - // is necessary. For now, setBlockSingleSuccessor will assert if BlockPtr - // already has successors. + NewBlock->setParent(BlockPtr->getParent()); + SmallVector Succs(BlockPtr->getSuccessors().begin(), + BlockPtr->getSuccessors().end()); + for (VPBlockBase *Succ : Succs) { + disconnectBlocks(BlockPtr, Succ); + connectBlocks(NewBlock, Succ); + } BlockPtr->setOneSuccessor(NewBlock); NewBlock->setPredecessors({BlockPtr}); - NewBlock->setParent(BlockPtr->getParent()); } /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p