diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8790,13 +8790,17 @@ return VPBB; } LLVM_DEBUG(dbgs() << "LV: Scalarizing and predicating:" << *I << "\n"); - assert(VPBB->getSuccessors().empty() && - "VPBB has successors when handling predicated replication."); + + VPBlockBase *SingleSucc = VPBB->getSingleSuccessor(); + assert(SingleSucc && "VPBB must have a single successor when handling " + "predicated replication."); + VPBlockUtils::disconnectBlocks(VPBB, SingleSucc); // Record predicated instructions for above packing optimizations. VPBlockBase *Region = createReplicateRegion(I, Recipe, Plan); VPBlockUtils::insertBlockAfter(Region, VPBB); auto *RegSucc = new VPBasicBlock(); VPBlockUtils::insertBlockAfter(RegSucc, Region); + VPBlockUtils::connectBlocks(RegSucc, SingleSucc); return RegSucc; } @@ -9017,31 +9021,28 @@ // visit each basic block after having visited its predecessor basic blocks. // --------------------------------------------------------------------------- - auto Plan = std::make_unique(); + // Create initial VPlan skeleton, with separate header and latch blocks. + VPBasicBlock *HeaderVPBB = new VPBasicBlock(OrigLoop->getHeader()->getName()); + VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch"); + VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB); + auto *TopRegion = new VPRegionBlock(HeaderVPBB, LatchVPBB, "vector loop"); + auto Plan = std::make_unique(TopRegion); // Scan the body of the loop in a topological order to visit each basic block // after having visited its predecessor basic blocks. LoopBlocksDFS DFS(OrigLoop); DFS.perform(LI); - VPBasicBlock *VPBB = nullptr; - VPBasicBlock *HeaderVPBB = nullptr; + VPBasicBlock *VPBB = HeaderVPBB; SmallVector InductionsToMove; + unsigned NumBBsProcessed = 0; for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) { // Relevant instructions from basic block BB will be grouped into VPRecipe // ingredients and fill a new VPBasicBlock. unsigned VPBBsForBB = 0; - auto *FirstVPBBForBB = new VPBasicBlock(BB->getName()); - if (VPBB) - VPBlockUtils::insertBlockAfter(FirstVPBBForBB, VPBB); - else { - auto *TopRegion = new VPRegionBlock("vector loop"); - TopRegion->setEntry(FirstVPBBForBB); - Plan->setEntry(TopRegion); - HeaderVPBB = FirstVPBBForBB; - } - VPBB = FirstVPBBForBB; + VPBB->setName(BB->getName()); Builder.setInsertPoint(VPBB); + ++NumBBsProcessed; // Introduce each ingredient into VPlan. // TODO: Model and preserve debug instrinsics in VPlan. @@ -9107,6 +9108,14 @@ : ""); } } + + // Avoid creating a new block if all blocks in the loop have been processed + // (= the last iteration of the loop). + if (NumBBsProcessed != OrigLoop->getNumBlocks()) { + auto *NextVPBBForBB = new VPBasicBlock("next.vpbb"); + VPBlockUtils::insertBlockAfter(NextVPBBForBB, VPBB); + VPBB = NextVPBBForBB; + } } assert(isa(Plan->getEntry()) && @@ -9183,8 +9192,6 @@ } } - cast(Plan->getEntry())->setExit(VPBB); - VPlanTransforms::removeRedundantInductionCasts(*Plan); // Now that sink-after is done, move induction recipes for optimized truncates @@ -9273,6 +9280,9 @@ RSO.flush(); Plan->setName(PlanName); + // Fold Exit block into its predecessor if possible. + VPBlockUtils::tryToMergeBlockIntoPredecessor(LatchVPBB); + assert(VPlanVerifier::verifyPlanIsValid(*Plan) && "VPlan is invalid"); return Plan; } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2353,17 +2353,22 @@ /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. If \p BlockPtr - /// has more than one successor, its conditional bit is propagated to \p - /// NewBlock. \p NewBlock must have neither successors nor predecessors. + /// has more than one successor, those successors moved from \p BlockPtr to \p + /// NewBlock and \p BlockPtr's conditional bit is propagated to \p NewBlock. + /// \p NewBlock must have neither successors nor predecessors. static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) { assert(NewBlock->getSuccessors().empty() && - "Can't insert new block with successors."); - // TODO: move successors from BlockPtr to NewBlock when this functionality - // is necessary. For now, setBlockSingleSuccessor will assert if BlockPtr - // already has successors. - BlockPtr->setOneSuccessor(NewBlock); - NewBlock->setPredecessors({BlockPtr}); + NewBlock->getPredecessors().empty() && + "Can't insert new block with predecessors or successors."); NewBlock->setParent(BlockPtr->getParent()); + SmallVector Succs(BlockPtr->successors()); + for (VPBlockBase *Succ : Succs) { + disconnectBlocks(BlockPtr, Succ); + connectBlocks(NewBlock, Succ); + } + NewBlock->setCondBit(BlockPtr->getCondBit()); + BlockPtr->setCondBit(nullptr); + connectBlocks(BlockPtr, NewBlock); } /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p @@ -2406,6 +2411,27 @@ To->removePredecessor(From); } + /// Try to merge \p Block into its single predecessor, if \p Block is a + /// VPBasicBlock and its predecessor has a single successor. Returns true if + /// \p Block has been merged and removed or false otherwise. + static bool tryToMergeBlockIntoPredecessor(VPBlockBase *Block) { + auto *VPBB = dyn_cast(Block); + auto *PredVPBB = dyn_cast(Block->getSinglePredecessor()); + if (!VPBB || !PredVPBB || !PredVPBB->getSingleSuccessor()) + return false; + + for (VPRecipeBase &R : make_early_inc_range(*VPBB)) + R.moveBefore(*PredVPBB, PredVPBB->end()); + VPBlockUtils::disconnectBlocks(PredVPBB, VPBB); + if (VPlan *Plan = Block->getPlan()) { + auto *TopRegion = cast(Plan->getEntry()); + if (TopRegion->getExit() == Block) + TopRegion->setExit(PredVPBB); + } + delete Block; + return true; + } + /// Returns true if the edge \p FromBlock -> \p ToBlock is a back-edge. static bool isBackEdge(const VPBlockBase *FromBlock, const VPBlockBase *ToBlock, const VPLoopInfo *VPLI) {