diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8883,15 +8883,13 @@ VPlanPtr Plan = VPlan::createInitialVPlan( createTripCountSCEV(Legal->getWidestInductionType(), PSE, OrigLoop), *PSE.getSE()); + VPBasicBlock *HeaderVPBB = new VPBasicBlock("vector.body"); VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch"); VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB); - auto *TopRegion = new VPRegionBlock(HeaderVPBB, LatchVPBB, "vector loop"); - VPBlockUtils::insertBlockAfter(TopRegion, Plan->getEntry()); - VPBasicBlock *MiddleVPBB = new VPBasicBlock("middle.block"); - VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion); + Plan->getVectorLoopRegion()->setEntry(HeaderVPBB); + Plan->getVectorLoopRegion()->setExiting(LatchVPBB); - // Don't use getDecisionAndClampRange here, because we don't know the UF // so this function is better to be conservative, rather than to split // it up into different VPlans. bool IVUpdateMayOverflow = false; @@ -8996,8 +8994,7 @@ // --------------------------------------------------------------------------- // Adjust the recipes for any inloop reductions. - adjustRecipesForReductions(cast(TopRegion->getExiting()), Plan, - RecipeBuilder, Range.Start); + adjustRecipesForReductions(LatchVPBB, Plan, RecipeBuilder, Range.Start); // Interleave memory: for each Interleave Group we marked earlier as relevant // for this VPlan, replace the Recipes widening its memory instructions with a diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -718,6 +718,10 @@ auto Plan = std::make_unique(Preheader, VecPreheader); Plan->TripCount = vputils::getOrCreateVPValueForSCEVExpr(*Plan, TripCount, SE); + auto *TopRegion = new VPRegionBlock("vector loop", false /*isReplicator*/); + VPBlockUtils::insertBlockAfter(TopRegion, Plan->getEntry()); + VPBasicBlock *MiddleVPBB = new VPBasicBlock("middle.block"); + VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion); return Plan; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp @@ -83,8 +83,13 @@ void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) { SmallVector VPBBPreds; // Collect VPBB predecessors. - for (BasicBlock *Pred : predecessors(BB)) + for (BasicBlock *Pred : predecessors(BB)) { + // Ingore the pre-header, as it will be created and connected as part of the + // initial skeleton construction. + if (Pred == TheLoop->getLoopPreheader()) + continue; VPBBPreds.push_back(getOrCreateVPBB(Pred)); + } VPBB->setPredecessors(VPBBPreds); } @@ -254,6 +259,18 @@ // Main interface to build the plain CFG. void PlainCFGBuilder::buildPlainCFG() { + // Reuse the top-level region, preheader and exit VPBBs from the skeleton. + Loop2Region[TheLoop] = Plan.getVectorLoopRegion(); + BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader(); + assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) && + "Unexpected loop preheader"); + VPBasicBlock *ThePreheaderVPBB = Plan.getEntry(); + BB2VPBB[ThePreheaderBB] = ThePreheaderVPBB; + BasicBlock *LoopExitBB = TheLoop->getUniqueExitBlock(); + assert(LoopExitBB && "Loops with multiple exits are not supported."); + BB2VPBB[LoopExitBB] = + cast(Plan.getVectorLoopRegion()->getSingleSuccessor()); + // 1. Scan the body of the loop in a topological order to visit each basic // block after having visited its predecessor basic blocks. Create a VPBB for // each BB and link it to its successor and predecessor VPBBs. Note that @@ -263,11 +280,6 @@ // Loop PH needs to be explicitly visited since it's not taken into account by // LoopBlocksDFS. - BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader(); - assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) && - "Unexpected loop preheader"); - VPBasicBlock *ThePreheaderVPBB = Plan.getEntry(); - BB2VPBB[ThePreheaderBB] = ThePreheaderVPBB; ThePreheaderVPBB->setName("vector.ph"); for (auto &I : *ThePreheaderBB) { if (I.getType()->isVoidTy()) @@ -277,7 +289,6 @@ // Create empty VPBB for Loop H so that we can link PH->H. VPBlockBase *HeaderVPBB = getOrCreateVPBB(TheLoop->getHeader()); HeaderVPBB->setName("vector.body"); - ThePreheaderVPBB->setOneSuccessor(HeaderVPBB); LoopBlocksRPO RPO(TheLoop); RPO.perform(LI); @@ -321,16 +332,6 @@ setVPBBPredsFromBB(VPBB, BB); } - // 2. Process outermost loop exit. We created an empty VPBB for the loop - // single exit BB during the RPO traversal of the loop body but Instructions - // weren't visited because it's not part of the the loop. - BasicBlock *LoopExitBB = TheLoop->getUniqueExitBlock(); - assert(LoopExitBB && "Loops with multiple exits are not supported."); - VPBasicBlock *LoopExitVPBB = BB2VPBB[LoopExitBB]; - // Loop exit was already set as successor of the loop exiting BB. - // We only set its predecessor VPBB now. - setVPBBPredsFromBB(LoopExitVPBB, LoopExitBB); - // 3. Fix up region blocks for loops. For each loop, // * use the header block as entry to the corresponding region, // * use the latch block as exit of the corresponding region, @@ -350,19 +351,29 @@ // Disconnect backedge and pre-header from header. VPBasicBlock *PreheaderVPBB = getOrCreateVPBB(L->getLoopPreheader()); - VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPBB); VPBlockUtils::disconnectBlocks(ExitingVPBB, HeaderVPBB); - - Region->setParent(PreheaderVPBB->getParent()); + if (L != TheLoop) { + // Disconnect prehader from Header and update parent. This is only needed + // when the region was created by the builder (and is not the top-level + // region which was created as part of the skeleton). + VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPBB); + Region->setParent(PreheaderVPBB->getParent()); + VPBlockUtils::connectBlocks(PreheaderVPBB, Region); + } Region->setEntry(HeaderVPBB); - VPBlockUtils::connectBlocks(PreheaderVPBB, Region); // Disconnect exit block from exiting (=latch) block, set exiting block and // connect region to exit block. VPBasicBlock *ExitVPBB = getOrCreateVPBB(L->getExitBlock()); - VPBlockUtils::disconnectBlocks(ExitingVPBB, ExitVPBB); + if (L != TheLoop) { + // Disconnect ExitingVPBB from ExitVPP. This is only needed when the + // region was created by the builder (and is not the top-level region + // which was created as part of the skeleton). + VPBlockUtils::disconnectBlocks(ExitingVPBB, ExitVPBB); + VPBlockUtils::connectBlocks(Region, ExitVPBB); + } else + ExitingVPBB->getSuccessors().clear(); Region->setExiting(ExitingVPBB); - VPBlockUtils::connectBlocks(Region, ExitVPBB); // Queue sub-loops for processing. LoopWorkList.append(L->begin(), L->end()); diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll @@ -11,9 +11,9 @@ ; CHECK-NEXT: ir<8> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: Successor(s): outer.header +; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: -; CHECK-NEXT: outer.header: { +; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: WIDEN-PHI ir<%outer.iv> = phi ir<0>, ir<%outer.iv.next> ; CHECK-NEXT: EMIT ir<%gep.1> = getelementptr ir<@arr2>, ir<0>, ir<%outer.iv> @@ -39,9 +39,9 @@ ; CHECK-NEXT: EMIT branch-on-cond ir<%outer.ec> ; CHECK-NEXT: No successors ; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): exit +; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: -; CHECK-NEXT: exit: +; CHECK-NEXT: middle.block: ; CHECK-NEXT: No successors ; CHECK-NEXT: } entry: