diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8801,13 +8801,12 @@ VPlanPtr Plan = VPlan::createInitialVPlan( createTripCountSCEV(Legal->getWidestInductionType(), PSE, OrigLoop), *PSE.getSE()); + VPBasicBlock *HeaderVPBB = new VPBasicBlock("vector.body"); VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch"); VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB); - auto *TopRegion = new VPRegionBlock(HeaderVPBB, LatchVPBB, "vector loop"); - VPBlockUtils::insertBlockAfter(TopRegion, Plan->getEntry()); - VPBasicBlock *MiddleVPBB = new VPBasicBlock("middle.block"); - VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion); + Plan->getVectorLoopRegion()->setEntry(HeaderVPBB); + Plan->getVectorLoopRegion()->setExiting(LatchVPBB); // Don't use getDecisionAndClampRange here, because we don't know the UF // so this function is better to be conservative, rather than to split @@ -8923,8 +8922,7 @@ // --------------------------------------------------------------------------- // Adjust the recipes for any inloop reductions. - adjustRecipesForReductions(cast(TopRegion->getExiting()), Plan, - RecipeBuilder, Range.Start); + adjustRecipesForReductions(LatchVPBB, Plan, RecipeBuilder, Range.Start); // Interleave memory: for each Interleave Group we marked earlier as relevant // for this VPlan, replace the Recipes widening its memory instructions with a diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2550,8 +2550,10 @@ ~VPlan(); - /// Create an initial VPlan with preheader and entry blocks. Creates a - /// VPExpandSCEVRecipe for \p TripCount and uses it as plan's trip count. + /// Create initial VPlan skeleton, having a VPBasicBlock for the pre-header + /// which contains SCEV expansions that need to happen before the CFG is + /// modified; a VPbasicBlock for the vector pre-header, followed by a region + /// for the vector loop, followed by the middle VPBasicBlock. static VPlanPtr createInitialVPlan(const SCEV *TripCount, ScalarEvolution &PSE); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -715,6 +715,11 @@ auto Plan = std::make_unique(Preheader, VecPreheader); Plan->TripCount = vputils::getOrCreateVPValueForSCEVExpr(*Plan, TripCount, SE); + // Create empty VPRegionBlock, to be filled during processing later. + auto *TopRegion = new VPRegionBlock("vector loop", false /*isReplicator*/); + VPBlockUtils::insertBlockAfter(TopRegion, Plan->getEntry()); + VPBasicBlock *MiddleVPBB = new VPBasicBlock("middle.block"); + VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion); return Plan; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp @@ -165,8 +165,9 @@ return VPBB; bool RegionExists = Loop2Region.contains(LoopOfBB); - assert(RegionExists ^ isHeaderBB(BB, LoopOfBB) && - "region must exist or BB must be a loop header"); + assert(LoopOfBB == TheLoop || + (RegionExists ^ isHeaderBB(BB, LoopOfBB)) && + "region must exist or BB must be a loop header"); if (RegionExists) { VPBB->setParent(Loop2Region[LoopOfBB]); } else { @@ -298,6 +299,19 @@ // Main interface to build the plain CFG. void PlainCFGBuilder::buildPlainCFG() { + // 0. Reuse the top-level region, vector-preheader and exit VPBBs from the + // skeleton. + VPRegionBlock *TheRegion = Plan.getVectorLoopRegion(); + Loop2Region[TheLoop] = TheRegion; + BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader(); + assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) && + "Unexpected loop preheader"); + VPBasicBlock *VectorPreheaderVPBB = Plan.getEntry(); + BB2VPBB[ThePreheaderBB] = VectorPreheaderVPBB; + BasicBlock *LoopExitBB = TheLoop->getUniqueExitBlock(); + assert(LoopExitBB && "Loops with multiple exits are not supported."); + BB2VPBB[LoopExitBB] = cast(TheRegion->getSingleSuccessor()); + // 1. Scan the body of the loop in a topological order to visit each basic // block after having visited its predecessor basic blocks. Create a VPBB for // each BB and link it to its successor and predecessor VPBBs. Note that @@ -307,25 +321,17 @@ // Loop PH needs to be explicitly visited since it's not taken into account by // LoopBlocksDFS. - BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader(); - assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) && - "Unexpected loop preheader"); - // buildPlainCFG needs to be called after createInitialVPlan, which creates - // the initial skeleton (including the preheader VPBB). buildPlainCFG builds - // the CFG for the loop nest and hooks it up to the initial skeleton. - VPBasicBlock *ThePreheaderVPBB = Plan.getEntry(); - BB2VPBB[ThePreheaderBB] = ThePreheaderVPBB; - ThePreheaderVPBB->setName("vector.ph"); + VectorPreheaderVPBB->setName("vector.ph"); for (auto &I : *ThePreheaderBB) { if (I.getType()->isVoidTy()) continue; IRDef2VPValue[&I] = Plan.getVPValueOrAddLiveIn(&I); } - // Create region (and header block) for the outer loop, so that we can link - // PH->Region. + // Create empty VPBB for header block of the top region and set its name. VPBlockBase *HeaderVPBB = getOrCreateVPBB(TheLoop->getHeader()); HeaderVPBB->setName("vector.body"); - ThePreheaderVPBB->setOneSuccessor(HeaderVPBB->getParent()); + HeaderVPBB->setParent(TheRegion); + TheRegion->setEntry(HeaderVPBB); LoopBlocksRPO RPO(TheLoop); RPO.perform(LI); @@ -343,7 +349,10 @@ else { // BB is a loop header, set the predecessor for the region. assert(isHeaderVPBB(VPBB) && "isHeaderBB and isHeaderVPBB disagree"); - setRegionPredsFromBB(Region, BB); + // BB is a loop header, set the predecessor for the region. + assert(LI->getLoopFor(BB)->getHeader() == BB); + if (TheRegion != Region) + setRegionPredsFromBB(Region, BB); } // Set VPBB successors. We create empty VPBBs for successors if they don't @@ -372,21 +381,13 @@ } // For a latch we need to set the successor of the region rather than that // of VPBB and it should be set to the exit, i.e., non-header successor. - Region->setOneSuccessor(isHeaderVPBB(Successor0) ? Successor1 : Successor0); + if (TheRegion != Region) + Region->setOneSuccessor(isHeaderVPBB(Successor0) ? Successor1 + : Successor0); Region->setExiting(VPBB); } - // 2. Process outermost loop exit. We created an empty VPBB for the loop - // single exit BB during the RPO traversal of the loop body but Instructions - // weren't visited because it's not part of the loop. - BasicBlock *LoopExitBB = TheLoop->getUniqueExitBlock(); - assert(LoopExitBB && "Loops with multiple exits are not supported."); - VPBasicBlock *LoopExitVPBB = BB2VPBB[LoopExitBB]; - // Loop exit was already set as successor of the loop exiting BB. - // We only set its predecessor VPBB now. - setVPBBPredsFromBB(LoopExitVPBB, LoopExitBB); - - // 3. The whole CFG has been built at this point so all the input Values must + // 2. The whole CFG has been built at this point so all the input Values must // have a VPlan couterpart. Fix VPlan phi nodes by adding their corresponding // VPlan operands. fixPhiNodes(); diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll @@ -11,9 +11,9 @@ ; CHECK-NEXT: ir<8> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: Successor(s): outer.header +; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: -; CHECK-NEXT: outer.header: { +; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: WIDEN-PHI ir<%outer.iv> = phi ir<0>, ir<%outer.iv.next> ; CHECK-NEXT: EMIT ir<%gep.1> = getelementptr ir<@arr2>, ir<0>, ir<%outer.iv> @@ -39,9 +39,9 @@ ; CHECK-NEXT: EMIT branch-on-cond ir<%outer.ec> ; CHECK-NEXT: No successors ; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): exit +; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: -; CHECK-NEXT: exit: +; CHECK-NEXT: middle.block: ; CHECK-NEXT: No successors ; CHECK-NEXT: } entry: