diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8777,10 +8777,8 @@ VPBasicBlock *HeaderVPBB = new VPBasicBlock("vector.body"); VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch"); VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB); - auto *TopRegion = new VPRegionBlock(HeaderVPBB, LatchVPBB, "vector loop"); - VPBlockUtils::insertBlockAfter(TopRegion, Plan->getEntry()); - VPBasicBlock *MiddleVPBB = new VPBasicBlock("middle.block"); - VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion); + Plan->getVectorLoopRegion()->setEntry(HeaderVPBB); + Plan->getVectorLoopRegion()->setExiting(LatchVPBB); // Don't use getDecisionAndClampRange here, because we don't know the UF // so this function is better to be conservative, rather than to split diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2561,8 +2561,11 @@ ~VPlan(); - /// Create an initial VPlan with preheader and entry blocks. Creates a - /// VPExpandSCEVRecipe for \p TripCount and uses it as plan's trip count. + /// Create initial VPlan skeleton, having an "entry" VPBasicBlock (wrapping + /// original scalar pre-header) which contains SCEV expansions that need to + /// happen before the CFG is modified; a VPBasicBlock for the vector + /// pre-header, followed by a region for the vector loop, followed by the + /// middle VPBasicBlock. static VPlanPtr createInitialVPlan(const SCEV *TripCount, ScalarEvolution &PSE); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -714,6 +714,11 @@ auto Plan = std::make_unique(Preheader, VecPreheader); Plan->TripCount = vputils::getOrCreateVPValueForSCEVExpr(*Plan, TripCount, SE); + // Create empty VPRegionBlock, to be filled during processing later. + auto *TopRegion = new VPRegionBlock("vector loop", false /*isReplicator*/); + VPBlockUtils::insertBlockAfter(TopRegion, VecPreheader); + VPBasicBlock *MiddleVPBB = new VPBasicBlock("middle.block"); + VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion); return Plan; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp @@ -173,21 +173,25 @@ // Get or create a region for the loop containing BB. Loop *LoopOfBB = LI->getLoopFor(BB); - if (!LoopOfBB) + if (!LoopOfBB || isHeaderBB(BB, TheLoop)) return VPBB; - VPRegionBlock *RegionOfBB = Loop2Region.lookup(LoopOfBB); - assert((RegionOfBB != nullptr) ^ isHeaderBB(BB, LoopOfBB) && + bool RegionExists = Loop2Region.contains(LoopOfBB); + assert(RegionExists ^ isHeaderBB(BB, LoopOfBB) && "region must exist or BB must be a loop header"); - if (RegionOfBB) { - VPBB->setParent(RegionOfBB); + if (RegionExists) { + assert(!isHeaderBB(BB, LoopOfBB) && + "a region to use only exists for non-header blocks"); + VPBB->setParent(Loop2Region[LoopOfBB]); } else { // If BB's loop is nested inside another loop within VPlan's scope, the // header of that enclosing loop was already visited and its region // constructed and recorded in Loop2Region. That region is now set as the // parent of VPBB's region. Otherwise it is set to null. - auto *RegionOfVPBB = new VPRegionBlock( - LoopOfBB->getHeader()->getName().str(), false /*isReplicator*/); + assert(isHeaderBB(BB, LoopOfBB) && + "new regions must be created for header blocks only"); + auto *RegionOfVPBB = + new VPRegionBlock(BB->getName().str(), false /*isReplicator*/); RegionOfVPBB->setParent(Loop2Region[LoopOfBB->getParentLoop()]); RegionOfVPBB->setEntry(VPBB); Loop2Region[LoopOfBB] = RegionOfVPBB; @@ -314,6 +318,26 @@ // Main interface to build the plain CFG. void PlainCFGBuilder::buildPlainCFG() { + // 0. Reuse the top-level region, vector-preheader and exit VPBBs from the + // skeleton. These were created directly rather than via getOrCreateVPBB(), + // revisit them now to update BB2VPBB and Loop2region. Note that header/entry + // and latch/exiting VPBB's of top-level region have yet to be created + VPRegionBlock *TheRegion = Plan.getVectorLoopRegion(); + Loop2Region[TheLoop] = TheRegion; + BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader(); + assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) && + "Unexpected loop preheader"); + auto *VectorPreheaderVPBB = + cast(TheRegion->getSinglePredecessor()); + // ThePreheaderBB conceptually corresponds to both Plan.getPreheader() (which + // wraps the original preheader BB) and Plan.getEntry() (which represents the + // new vector preheader); here we're interested in setting BB2VPBB to the + // latter. + BB2VPBB[ThePreheaderBB] = VectorPreheaderVPBB; + BasicBlock *LoopExitBB = TheLoop->getUniqueExitBlock(); + assert(LoopExitBB && "Loops with multiple exits are not supported."); + BB2VPBB[LoopExitBB] = cast(TheRegion->getSingleSuccessor()); + // 1. Scan the body of the loop in a topological order to visit each basic // block after having visited its predecessor basic blocks. Create a VPBB for // each BB and link it to its successor and predecessor VPBBs. Note that @@ -323,25 +347,15 @@ // Loop PH needs to be explicitly visited since it's not taken into account by // LoopBlocksDFS. - BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader(); - assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) && - "Unexpected loop preheader"); - // buildPlainCFG needs to be called after createInitialVPlan, which creates - // the initial skeleton (including the preheader VPBB). buildPlainCFG builds - // the CFG for the loop nest and hooks it up to the initial skeleton. - VPBasicBlock *ThePreheaderVPBB = Plan.getEntry(); - BB2VPBB[ThePreheaderBB] = ThePreheaderVPBB; - ThePreheaderVPBB->setName("vector.ph"); for (auto &I : *ThePreheaderBB) { if (I.getType()->isVoidTy()) continue; IRDef2VPValue[&I] = Plan.getVPValueOrAddLiveIn(&I); } - // Create region (and header block) for the outer loop, so that we can link - // PH->Region. + // Create empty VPBB for header block of the top region and set its name. VPBlockBase *HeaderVPBB = getOrCreateVPBB(TheLoop->getHeader()); HeaderVPBB->setName("vector.body"); - ThePreheaderVPBB->setOneSuccessor(HeaderVPBB->getParent()); + TheRegion->setEntry(HeaderVPBB); LoopBlocksRPO RPO(TheLoop); RPO.perform(LI); @@ -354,12 +368,14 @@ createVPInstructionsForVPBB(VPBB, BB); Loop *LoopForBB = LI->getLoopFor(BB); // Set VPBB predecessors in the same order as they are in the incoming BB. - if (!isHeaderBB(BB, LoopForBB)) + if (!isHeaderBB(BB, LoopForBB)) { setVPBBPredsFromBB(VPBB, BB); - else { - // BB is a loop header, set the predecessor for the region. + } else { + // BB is a loop header, set the predecessor for the region, except for the + // top region, whose predecessor was set when creating VPlan's skeleton. assert(isHeaderVPBB(VPBB) && "isHeaderBB and isHeaderVPBB disagree"); - setRegionPredsFromBB(Region, BB); + if (TheRegion != Region) + setRegionPredsFromBB(Region, BB); } // Set VPBB successors. We create empty VPBBs for successors if they don't @@ -387,22 +403,16 @@ continue; } // For a latch we need to set the successor of the region rather than that - // of VPBB and it should be set to the exit, i.e., non-header successor. - Region->setOneSuccessor(isHeaderVPBB(Successor0) ? Successor1 : Successor0); + // of VPBB and it should be set to the exit, i.e., non-header successor, + // except for the top region, whose successor was set when creating VPlan's + // skeleton. + if (TheRegion != Region) + Region->setOneSuccessor(isHeaderVPBB(Successor0) ? Successor1 + : Successor0); Region->setExiting(VPBB); } - // 2. Process outermost loop exit. We created an empty VPBB for the loop - // single exit BB during the RPO traversal of the loop body but Instructions - // weren't visited because it's not part of the loop. - BasicBlock *LoopExitBB = TheLoop->getUniqueExitBlock(); - assert(LoopExitBB && "Loops with multiple exits are not supported."); - VPBasicBlock *LoopExitVPBB = BB2VPBB[LoopExitBB]; - // Loop exit was already set as successor of the loop exiting BB. - // We only set its predecessor VPBB now. - setVPBBPredsFromBB(LoopExitVPBB, LoopExitBB); - - // 3. The whole CFG has been built at this point so all the input Values must + // 2. The whole CFG has been built at this point so all the input Values must // have a VPlan couterpart. Fix VPlan phi nodes by adding their corresponding // VPlan operands. fixPhiNodes(); diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll @@ -11,9 +11,9 @@ ; CHECK-NEXT: ir<8> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: Successor(s): outer.header +; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: -; CHECK-NEXT: outer.header: { +; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: WIDEN-PHI ir<%outer.iv> = phi ir<0>, ir<%outer.iv.next> ; CHECK-NEXT: EMIT ir<%gep.1> = getelementptr ir<@arr2>, ir<0>, ir<%outer.iv> @@ -39,9 +39,9 @@ ; CHECK-NEXT: EMIT branch-on-cond ir<%outer.ec> ; CHECK-NEXT: No successors ; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): exit +; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: -; CHECK-NEXT: exit: +; CHECK-NEXT: middle.block: ; CHECK-NEXT: No successors ; CHECK-NEXT: } entry: diff --git a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp --- a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp @@ -112,12 +112,12 @@ ] N1 [label = "vector.ph:\l" + - "Successor(s): for.body\l" + "Successor(s): vector loop\l" ] N1 -> N2 [ label="" lhead=cluster_N3] subgraph cluster_N3 { fontname=Courier - label="\ for.body" + label="\ vector loop" N2 [label = "vector.body:\l" + " WIDEN-PHI ir\<%indvars.iv\> = phi ir\<0\>, ir\<%indvars.iv.next\>\l" + @@ -133,7 +133,7 @@ } N2 -> N4 [ label="" ltail=cluster_N3] N4 [label = - "for.end:\l" + + "middle.block:\l" + "No successors\l" ] }