diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8754,13 +8754,12 @@ VPlanPtr Plan = VPlan::createInitialVPlan( createTripCountSCEV(Legal->getWidestInductionType(), PSE, OrigLoop), *PSE.getSE()); + VPBasicBlock *HeaderVPBB = new VPBasicBlock("vector.body"); VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch"); VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB); - auto *TopRegion = new VPRegionBlock(HeaderVPBB, LatchVPBB, "vector loop"); - VPBlockUtils::insertBlockAfter(TopRegion, Plan->getEntry()); - VPBasicBlock *MiddleVPBB = new VPBasicBlock("middle.block"); - VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion); + Plan->getVectorLoopRegion()->setEntry(HeaderVPBB); + Plan->getVectorLoopRegion()->setExiting(LatchVPBB); // Don't use getDecisionAndClampRange here, because we don't know the UF // so this function is better to be conservative, rather than to split @@ -8878,8 +8877,7 @@ // --------------------------------------------------------------------------- // Adjust the recipes for any inloop reductions. - adjustRecipesForReductions(cast(TopRegion->getExiting()), Plan, - RecipeBuilder, Range.Start); + adjustRecipesForReductions(LatchVPBB, Plan, RecipeBuilder, Range.Start); // Interleave memory: for each Interleave Group we marked earlier as relevant // for this VPlan, replace the Recipes widening its memory instructions with a diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2542,8 +2542,10 @@ ~VPlan(); - /// Create an initial VPlan with preheader and entry blocks. Creates a - /// VPExpandSCEVRecipe for \p TripCount and uses it as plan's trip count. + /// Create initial VPlan skeleton, having a VPBasicBlock for the pre-header + /// which contains SCEV expansions that need to happen before the CFG is + /// modified; a VPbasicBlock for the vector pre-header, followed by a region + /// for the vector loop, followed by the middle VPBasicBlock. static VPlanPtr createInitialVPlan(const SCEV *TripCount, ScalarEvolution &PSE); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -718,6 +718,11 @@ auto Plan = std::make_unique(Preheader, VecPreheader); Plan->TripCount = vputils::getOrCreateVPValueForSCEVExpr(*Plan, TripCount, SE); + // Create empty VPRegionBlock, to be filled during processing later. + auto *TopRegion = new VPRegionBlock("vector loop", false /*isReplicator*/); + VPBlockUtils::insertBlockAfter(TopRegion, Plan->getEntry()); + VPBasicBlock *MiddleVPBB = new VPBasicBlock("middle.block"); + VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion); return Plan; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp @@ -83,6 +83,9 @@ // Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB // must have no predecessors. void PlainCFGBuilder::setVPBBPredsFromBB(VPBlockBase *VPBB, BasicBlock *BB) { + // Block already has predecessors set, skip it. + if (VPBB->getNumPredecessors() != 0) + return; SmallVector VPBBPreds; // If VPBB is a region block, we are dealing with a loop header block. Connect @@ -289,6 +292,19 @@ // Main interface to build the plain CFG. void PlainCFGBuilder::buildPlainCFG() { + // 0. Reuse the top-level region, vector-preheader and exit VPBBs from the + // skeleton. + VPRegionBlock *TheRegion = Plan.getVectorLoopRegion(); + Loop2Region[TheLoop] = TheRegion; + BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader(); + assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) && + "Unexpected loop preheader"); + VPBasicBlock *VectorPreheaderVPBB = Plan.getEntry(); + BB2VPB[ThePreheaderBB] = VectorPreheaderVPBB; + BasicBlock *LoopExitBB = TheLoop->getUniqueExitBlock(); + assert(LoopExitBB && "Loops with multiple exits are not supported."); + BB2VPB[LoopExitBB] = cast(TheRegion->getSingleSuccessor()); + // 1. Scan the body of the loop in a topological order to visit each basic // block after having visited its predecessor basic blocks. Create a VPBB for // each BB and link it to its successor and predecessor VPBBs. Note that @@ -298,21 +314,15 @@ // Loop PH needs to be explicitly visited since it's not taken into account by // LoopBlocksDFS. - BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader(); - assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) && - "Unexpected loop preheader"); - VPBasicBlock *ThePreheaderVPBB = Plan.getEntry(); - BB2VPB[ThePreheaderBB] = ThePreheaderVPBB; - ThePreheaderVPBB->setName("vector.ph"); + VectorPreheaderVPBB->setName("vector.ph"); for (auto &I : *ThePreheaderBB) { if (I.getType()->isVoidTy()) continue; IRDef2VPValue[&I] = Plan.getVPValueOrAddLiveIn(&I); } - // Create empty VPBB for Loop H so that we can link PH->H. - VPBlockBase *HeaderVPBB = getOrCreateVPB(TheLoop->getHeader()); - getOrCreateVPBB(TheLoop->getHeader())->setName("vector.body"); - ThePreheaderVPBB->setOneSuccessor(HeaderVPBB); + // Create empty VPBB for header block of the top region and set its name. + VPBlockBase *HeaderVPBB = getOrCreateVPBB(TheLoop->getHeader()); + HeaderVPBB->setName("vector.body"); LoopBlocksRPO RPO(TheLoop); RPO.perform(LI); @@ -335,6 +345,7 @@ assert(SuccVPBB && "VPBB Successor not found."); VPBB->setOneSuccessor(SuccVPBB); } else if (NumSuccs == 2) { + VPBlockBase *SuccVPBB0 = getOrCreateVPB(TI->getSuccessor(0)); assert(SuccVPBB0 && "Successor 0 not found."); VPBlockBase *SuccVPBB1 = getOrCreateVPB(TI->getSuccessor(1)); @@ -369,17 +380,7 @@ setVPBBPredsFromBB(getOrCreateVPB(BB), BB); } - // 2. Process outermost loop exit. We created an empty VPBB for the loop - // single exit BB during the RPO traversal of the loop body but Instructions - // weren't visited because it's not part of the the loop. - BasicBlock *LoopExitBB = TheLoop->getUniqueExitBlock(); - assert(LoopExitBB && "Loops with multiple exits are not supported."); - VPBasicBlock *LoopExitVPBB = getOrCreateVPBB(LoopExitBB); - // Loop exit was already set as successor of the loop exiting BB. - // We only set its predecessor VPBB now. - setVPBBPredsFromBB(LoopExitVPBB, LoopExitBB); - - // 3. The whole CFG has been built at this point so all the input Values must + // 2. The whole CFG has been built at this point so all the input Values must // have a VPlan couterpart. Fix VPlan phi nodes by adding their corresponding // VPlan operands. fixPhiNodes(); diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll @@ -11,9 +11,9 @@ ; CHECK-NEXT: ir<8> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: Successor(s): outer.header +; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: -; CHECK-NEXT: outer.header: { +; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: WIDEN-PHI ir<%outer.iv> = phi ir<0>, ir<%outer.iv.next> ; CHECK-NEXT: EMIT ir<%gep.1> = getelementptr ir<@arr2>, ir<0>, ir<%outer.iv> @@ -39,9 +39,9 @@ ; CHECK-NEXT: EMIT branch-on-cond ir<%outer.ec> ; CHECK-NEXT: No successors ; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): exit +; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: -; CHECK-NEXT: exit: +; CHECK-NEXT: middle.block: ; CHECK-NEXT: No successors ; CHECK-NEXT: } entry: