diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8777,10 +8777,8 @@ VPBasicBlock *HeaderVPBB = new VPBasicBlock("vector.body"); VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch"); VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB); - auto *TopRegion = new VPRegionBlock(HeaderVPBB, LatchVPBB, "vector loop"); - VPBlockUtils::insertBlockAfter(TopRegion, Plan->getEntry()); - VPBasicBlock *MiddleVPBB = new VPBasicBlock("middle.block"); - VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion); + Plan->getVectorLoopRegion()->setEntry(HeaderVPBB); + Plan->getVectorLoopRegion()->setExiting(LatchVPBB); // Don't use getDecisionAndClampRange here, because we don't know the UF // so this function is better to be conservative, rather than to split diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2561,8 +2561,11 @@ ~VPlan(); - /// Create an initial VPlan with preheader and entry blocks. Creates a - /// VPExpandSCEVRecipe for \p TripCount and uses it as plan's trip count. + /// Create initial VPlan skeleton, having an "entry" VPBasicBlock (wrapping + /// original scalar pre-header) which contains SCEV expansions that need to + /// happen before the CFG is modified; a VPBasicBlock for the vector + /// pre-header, followed by a region for the vector loop, followed by the + /// middle VPBasicBlock. static VPlanPtr createInitialVPlan(const SCEV *TripCount, ScalarEvolution &PSE); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -714,6 +714,11 @@ auto Plan = std::make_unique(Preheader, VecPreheader); Plan->TripCount = vputils::getOrCreateVPValueForSCEVExpr(*Plan, TripCount, SE); + // Create empty VPRegionBlock, to be filled during processing later. + auto *TopRegion = new VPRegionBlock("vector loop", false /*isReplicator*/); + VPBlockUtils::insertBlockAfter(TopRegion, VecPreheader); + VPBasicBlock *MiddleVPBB = new VPBasicBlock("middle.block"); + VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion); return Plan; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp @@ -63,7 +63,7 @@ void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB); void setRegionPredsFromBB(VPRegionBlock *VPBB, BasicBlock *BB); void fixPhiNodes(); - VPBasicBlock *getOrCreateVPBB(BasicBlock *BB); + VPBasicBlock *getOrCreateVPBB(BasicBlock *BB, StringRef Name = ""); #ifndef NDEBUG bool isExternalDef(Value *Val); #endif @@ -160,15 +160,17 @@ // corresponding to the containing loop or retrieve an existing one if it was // already created. If no region exists yet for the loop containing \p BB, a new // one is created. -VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) { +VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB, StringRef Name) { if (auto *VPBB = BB2VPBB.lookup(BB)) { // Retrieve existing VPBB. return VPBB; } // Create new VPBB. - LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << BB->getName() << "\n"); - VPBasicBlock *VPBB = new VPBasicBlock(BB->getName()); + if (Name.size() == 0) + Name = BB->getName(); + LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n"); + VPBasicBlock *VPBB = new VPBasicBlock(Name); BB2VPBB[BB] = VPBB; // Get or create a region for the loop containing BB. @@ -176,22 +178,25 @@ if (!LoopOfBB) return VPBB; - VPRegionBlock *RegionOfBB = Loop2Region.lookup(LoopOfBB); - assert((RegionOfBB != nullptr) ^ isHeaderBB(BB, LoopOfBB) && - "region must exist or BB must be a loop header"); - if (RegionOfBB) { - VPBB->setParent(RegionOfBB); + auto *RegionOfVPBB = Loop2Region.lookup(LoopOfBB); + if (!isHeaderBB(BB, LoopOfBB)) { + assert(RegionOfVPBB && + "Region should have been created by visiting header earlier"); + VPBB->setParent(RegionOfVPBB); + return VPBB; + } + + assert(!RegionOfVPBB && "..."); + // Handle a header - take care of its Region. + if (LoopOfBB == TheLoop) { + RegionOfVPBB = Plan.getVectorLoopRegion(); } else { - // If BB's loop is nested inside another loop within VPlan's scope, the - // header of that enclosing loop was already visited and its region - // constructed and recorded in Loop2Region. That region is now set as the - // parent of VPBB's region. Otherwise it is set to null. - auto *RegionOfVPBB = new VPRegionBlock( - LoopOfBB->getHeader()->getName().str(), false /*isReplicator*/); + RegionOfVPBB = + new VPRegionBlock(BB->getName().str(), false /*isReplicator*/); RegionOfVPBB->setParent(Loop2Region[LoopOfBB->getParentLoop()]); - RegionOfVPBB->setEntry(VPBB); - Loop2Region[LoopOfBB] = RegionOfVPBB; } + RegionOfVPBB->setEntry(VPBB); + Loop2Region[LoopOfBB] = RegionOfVPBB; return VPBB; } @@ -314,6 +319,25 @@ // Main interface to build the plain CFG. void PlainCFGBuilder::buildPlainCFG() { + // 0. Reuse the top-level region, vector-preheader and exit VPBBs from the + // skeleton. These were created directly rather than via getOrCreateVPBB(), + // revisit them now to update BB2VPBB and Loop2region. Note that header/entry + // and latch/exiting VPBB's of top-level region have yet to be created + VPRegionBlock *TheRegion = Plan.getVectorLoopRegion(); + BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader(); + assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) && + "Unexpected loop preheader"); + auto *VectorPreheaderVPBB = + cast(TheRegion->getSinglePredecessor()); + // ThePreheaderBB conceptually corresponds to both Plan.getPreheader() (which + // wraps the original preheader BB) and Plan.getEntry() (which represents the + // new vector preheader); here we're interested in setting BB2VPBB to the + // latter. + BB2VPBB[ThePreheaderBB] = VectorPreheaderVPBB; + BasicBlock *LoopExitBB = TheLoop->getUniqueExitBlock(); + assert(LoopExitBB && "Loops with multiple exits are not supported."); + BB2VPBB[LoopExitBB] = cast(TheRegion->getSingleSuccessor()); + // 1. Scan the body of the loop in a topological order to visit each basic // block after having visited its predecessor basic blocks. Create a VPBB for // each BB and link it to its successor and predecessor VPBBs. Note that @@ -323,25 +347,13 @@ // Loop PH needs to be explicitly visited since it's not taken into account by // LoopBlocksDFS. - BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader(); - assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) && - "Unexpected loop preheader"); - // buildPlainCFG needs to be called after createInitialVPlan, which creates - // the initial skeleton (including the preheader VPBB). buildPlainCFG builds - // the CFG for the loop nest and hooks it up to the initial skeleton. - VPBasicBlock *ThePreheaderVPBB = Plan.getEntry(); - BB2VPBB[ThePreheaderBB] = ThePreheaderVPBB; - ThePreheaderVPBB->setName("vector.ph"); for (auto &I : *ThePreheaderBB) { if (I.getType()->isVoidTy()) continue; IRDef2VPValue[&I] = Plan.getVPValueOrAddLiveIn(&I); } - // Create region (and header block) for the outer loop, so that we can link - // PH->Region. - VPBlockBase *HeaderVPBB = getOrCreateVPBB(TheLoop->getHeader()); - HeaderVPBB->setName("vector.body"); - ThePreheaderVPBB->setOneSuccessor(HeaderVPBB->getParent()); + // Create empty VPBB for header block of the top region and set its name. + getOrCreateVPBB(TheLoop->getHeader(), "vector.body"); LoopBlocksRPO RPO(TheLoop); RPO.perform(LI); @@ -354,12 +366,14 @@ createVPInstructionsForVPBB(VPBB, BB); Loop *LoopForBB = LI->getLoopFor(BB); // Set VPBB predecessors in the same order as they are in the incoming BB. - if (!isHeaderBB(BB, LoopForBB)) + if (!isHeaderBB(BB, LoopForBB)) { setVPBBPredsFromBB(VPBB, BB); - else { - // BB is a loop header, set the predecessor for the region. + } else { + // BB is a loop header, set the predecessor for the region, except for the + // top region, whose predecessor was set when creating VPlan's skeleton. assert(isHeaderVPBB(VPBB) && "isHeaderBB and isHeaderVPBB disagree"); - setRegionPredsFromBB(Region, BB); + if (TheRegion != Region) + setRegionPredsFromBB(Region, BB); } // Set VPBB successors. We create empty VPBBs for successors if they don't @@ -387,22 +401,16 @@ continue; } // For a latch we need to set the successor of the region rather than that - // of VPBB and it should be set to the exit, i.e., non-header successor. - Region->setOneSuccessor(isHeaderVPBB(Successor0) ? Successor1 : Successor0); + // of VPBB and it should be set to the exit, i.e., non-header successor, + // except for the top region, whose successor was set when creating VPlan's + // skeleton. + if (TheRegion != Region) + Region->setOneSuccessor(isHeaderVPBB(Successor0) ? Successor1 + : Successor0); Region->setExiting(VPBB); } - // 2. Process outermost loop exit. We created an empty VPBB for the loop - // single exit BB during the RPO traversal of the loop body but Instructions - // weren't visited because it's not part of the loop. - BasicBlock *LoopExitBB = TheLoop->getUniqueExitBlock(); - assert(LoopExitBB && "Loops with multiple exits are not supported."); - VPBasicBlock *LoopExitVPBB = BB2VPBB[LoopExitBB]; - // Loop exit was already set as successor of the loop exiting BB. - // We only set its predecessor VPBB now. - setVPBBPredsFromBB(LoopExitVPBB, LoopExitBB); - - // 3. The whole CFG has been built at this point so all the input Values must + // 2. The whole CFG has been built at this point so all the input Values must // have a VPlan couterpart. Fix VPlan phi nodes by adding their corresponding // VPlan operands. fixPhiNodes(); diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll @@ -11,9 +11,9 @@ ; CHECK-NEXT: ir<8> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: Successor(s): outer.header +; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: -; CHECK-NEXT: outer.header: { +; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: WIDEN-PHI ir<%outer.iv> = phi ir<0>, ir<%outer.iv.next> ; CHECK-NEXT: EMIT ir<%gep.1> = getelementptr ir<@arr2>, ir<0>, ir<%outer.iv> @@ -39,9 +39,9 @@ ; CHECK-NEXT: EMIT branch-on-cond ir<%outer.ec> ; CHECK-NEXT: No successors ; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): exit +; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: -; CHECK-NEXT: exit: +; CHECK-NEXT: middle.block: ; CHECK-NEXT: No successors ; CHECK-NEXT: } entry: diff --git a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp --- a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp @@ -112,12 +112,12 @@ ] N1 [label = "vector.ph:\l" + - "Successor(s): for.body\l" + "Successor(s): vector loop\l" ] N1 -> N2 [ label="" lhead=cluster_N3] subgraph cluster_N3 { fontname=Courier - label="\ for.body" + label="\ vector loop" N2 [label = "vector.body:\l" + " WIDEN-PHI ir\<%indvars.iv\> = phi ir\<0\>, ir\<%indvars.iv.next\>\l" + @@ -133,7 +133,7 @@ } N2 -> N4 [ label="" ltail=cluster_N3] N4 [label = - "for.end:\l" + + "middle.block:\l" + "No successors\l" ] }