diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8788,13 +8788,12 @@ VPlanPtr Plan = VPlan::createInitialVPlan( createTripCountSCEV(Legal->getWidestInductionType(), PSE, OrigLoop), *PSE.getSE()); + VPBasicBlock *HeaderVPBB = new VPBasicBlock("vector.body"); VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch"); VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB); - auto *TopRegion = new VPRegionBlock(HeaderVPBB, LatchVPBB, "vector loop"); - VPBlockUtils::insertBlockAfter(TopRegion, Plan->getEntry()); - VPBasicBlock *MiddleVPBB = new VPBasicBlock("middle.block"); - VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion); + Plan->getVectorLoopRegion()->setEntry(HeaderVPBB); + Plan->getVectorLoopRegion()->setExiting(LatchVPBB); // Don't use getDecisionAndClampRange here, because we don't know the UF // so this function is better to be conservative, rather than to split @@ -8912,8 +8911,7 @@ // --------------------------------------------------------------------------- // Adjust the recipes for any inloop reductions. - adjustRecipesForReductions(cast(TopRegion->getExiting()), Plan, - RecipeBuilder, Range.Start); + adjustRecipesForReductions(LatchVPBB, Plan, RecipeBuilder, Range.Start); // Interleave memory: for each Interleave Group we marked earlier as relevant // for this VPlan, replace the Recipes widening its memory instructions with a diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2539,8 +2539,10 @@ ~VPlan(); - /// Create an initial VPlan with preheader and entry blocks. Creates a - /// VPExpandSCEVRecipe for \p TripCount and uses it as plan's trip count. + /// Create initial VPlan skeleton, having a VPBasicBlock for the pre-header + /// which contains SCEV expansions that need to happen before the CFG is + /// modified; a VPbasicBlock for the vector pre-header, followed by a region + /// for the vector loop, followed by the middle VPBasicBlock. static VPlanPtr createInitialVPlan(const SCEV *TripCount, ScalarEvolution &PSE); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -718,6 +718,11 @@ auto Plan = std::make_unique(Preheader, VecPreheader); Plan->TripCount = vputils::getOrCreateVPValueForSCEVExpr(*Plan, TripCount, SE); + // Create empty VPRegionBlock, to be filled during processing later. + auto *TopRegion = new VPRegionBlock("vector loop", false /*isReplicator*/); + VPBlockUtils::insertBlockAfter(TopRegion, Plan->getEntry()); + VPBasicBlock *MiddleVPBB = new VPBasicBlock("middle.block"); + VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion); return Plan; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp @@ -303,6 +303,19 @@ // Main interface to build the plain CFG. void PlainCFGBuilder::buildPlainCFG() { + // 0. Reuse the top-level region, vector-preheader and exit VPBBs from the + // skeleton. + VPRegionBlock *TheRegion = Plan.getVectorLoopRegion(); + Loop2Region[TheLoop] = TheRegion; + BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader(); + assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) && + "Unexpected loop preheader"); + VPBasicBlock *VectorPreheaderVPBB = Plan.getEntry(); + BB2VPBB[ThePreheaderBB] = VectorPreheaderVPBB; + BasicBlock *LoopExitBB = TheLoop->getUniqueExitBlock(); + assert(LoopExitBB && "Loops with multiple exits are not supported."); + BB2VPBB[LoopExitBB] = cast(TheRegion->getSingleSuccessor()); + // 1. Scan the body of the loop in a topological order to visit each basic // block after having visited its predecessor basic blocks. Create a VPBB for // each BB and link it to its successor and predecessor VPBBs. Note that @@ -312,22 +325,17 @@ // Loop PH needs to be explicitly visited since it's not taken into account by // LoopBlocksDFS. - BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader(); - assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) && - "Unexpected loop preheader"); - VPBasicBlock *ThePreheaderVPBB = Plan.getEntry(); - BB2VPBB[ThePreheaderBB] = ThePreheaderVPBB; - ThePreheaderVPBB->setName("vector.ph"); + VectorPreheaderVPBB->setName("vector.ph"); for (auto &I : *ThePreheaderBB) { if (I.getType()->isVoidTy()) continue; IRDef2VPValue[&I] = Plan.getVPValueOrAddLiveIn(&I); } - // Create region (and header block) for the outer loop, so that we can link - // PH->Region. - auto *TopRegion = cast(getOrCreateVPB(TheLoop->getHeader())); - ThePreheaderVPBB->setOneSuccessor(TopRegion); - TopRegion->getEntry()->setName("vector.body"); + // Create empty VPBB for header block of the top region and set its name. + VPBlockBase *HeaderVPBB = getOrCreateVPBB(TheLoop->getHeader()); + HeaderVPBB->setName("vector.body"); + HeaderVPBB->setParent(TheRegion); + TheRegion->setEntry(HeaderVPBB); LoopBlocksRPO RPO(TheLoop); RPO.perform(LI); @@ -343,7 +351,8 @@ if (isHeaderVPBB(VPBB)) { // BB is a loop header, set the predecessor for the region. assert(LI->getLoopFor(BB)->getHeader() == BB); - setRegionPredsFromBB(Region, BB); + if (TheRegion != Region) + setRegionPredsFromBB(Region, BB); } else setVPBBPredsFromBB(VPBB, BB); @@ -374,7 +383,10 @@ Successors[0] == Region ? Successors[1] : Successors[0]; Region->setExiting(VPBB); Region->setParent(ExitVPBB->getParent()); - Successors = {ExitVPBB}; + if (TheRegion != Region) + Successors = {ExitVPBB}; + else + continue; // For the latch, we need to set the successor for the region. VPB = Region; } @@ -382,21 +394,13 @@ // Link successors. if (Successors.size() == 1) VPB->setOneSuccessor(Successors[0]); - else if (Successors.size() == 2) + else { + assert(Successors.size() == 2); VPB->setTwoSuccessors(Successors[0], Successors[1]); + } } - // 2. Process outermost loop exit. We created an empty VPBB for the loop - // single exit BB during the RPO traversal of the loop body but Instructions - // weren't visited because it's not part of the the loop. - BasicBlock *LoopExitBB = TheLoop->getUniqueExitBlock(); - assert(LoopExitBB && "Loops with multiple exits are not supported."); - VPBasicBlock *LoopExitVPBB = BB2VPBB[LoopExitBB]; - // Loop exit was already set as successor of the loop exiting BB. - // We only set its predecessor VPBB now. - setVPBBPredsFromBB(LoopExitVPBB, LoopExitBB); - - // 3. The whole CFG has been built at this point so all the input Values must + // 2. The whole CFG has been built at this point so all the input Values must // have a VPlan couterpart. Fix VPlan phi nodes by adding their corresponding // VPlan operands. fixPhiNodes(); diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll @@ -11,9 +11,9 @@ ; CHECK-NEXT: ir<8> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: Successor(s): outer.header +; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: -; CHECK-NEXT: outer.header: { +; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: WIDEN-PHI ir<%outer.iv> = phi ir<0>, ir<%outer.iv.next> ; CHECK-NEXT: EMIT ir<%gep.1> = getelementptr ir<@arr2>, ir<0>, ir<%outer.iv> @@ -39,9 +39,9 @@ ; CHECK-NEXT: EMIT branch-on-cond ir<%outer.ec> ; CHECK-NEXT: No successors ; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): exit +; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: -; CHECK-NEXT: exit: +; CHECK-NEXT: middle.block: ; CHECK-NEXT: No successors ; CHECK-NEXT: } entry: