diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9118,31 +9118,6 @@ [this](PHINode *P) { return Legal->getIntOrFpInductionDescriptor(P); }, DeadInstructions, *PSE.getSE()); - // Update plan to be compatible with the inner loop vectorizer for - // code-generation. - VPRegionBlock *LoopRegion = Plan->getVectorLoopRegion(); - VPBasicBlock *Preheader = LoopRegion->getEntryBasicBlock(); - VPBasicBlock *Exit = LoopRegion->getExitBasicBlock(); - VPBlockBase *Latch = Exit->getSinglePredecessor(); - VPBlockBase *Header = Preheader->getSingleSuccessor(); - - // 1. Move preheader block out of main vector loop. - Preheader->setParent(LoopRegion->getParent()); - VPBlockUtils::disconnectBlocks(Preheader, Header); - VPBlockUtils::connectBlocks(Preheader, LoopRegion); - Plan->setEntry(Preheader); - - // 2. Disconnect backedge and exit block. - VPBlockUtils::disconnectBlocks(Latch, Header); - VPBlockUtils::disconnectBlocks(Latch, Exit); - - // 3. Update entry and exit of main vector loop region. - LoopRegion->setEntry(Header); - LoopRegion->setExit(Latch); - - // 4. Remove exit block. - delete Exit; - addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), DebugLoc(), true, true); return Plan; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2654,13 +2654,9 @@ /// Returns the VPRegionBlock of the vector loop. VPRegionBlock *getVectorLoopRegion() { - if (auto *R = dyn_cast(getEntry())) - return R; return cast(getEntry()->getSingleSuccessor()); } const VPRegionBlock *getVectorLoopRegion() const { - if (auto *R = dyn_cast(getEntry())) - return R; return cast(getEntry()->getSingleSuccessor()); } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -265,19 +265,6 @@ auto &PredVPSuccessors = PredVPBB->getSuccessors(); BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB]; - // In outer loop vectorization scenario, the predecessor BBlock may not yet - // be visited(backedge). Mark the VPBasicBlock for fixup at the end of - // vectorization. We do not encounter this case in inner loop vectorization - // as we start out by building a loop skeleton with the vector loop header - // and latch blocks. As a result, we never enter this function for the - // header block in the non VPlan-native path. - if (!PredBB) { - assert(EnableVPlanNativePath && - "Unexpected null predecessor in non VPlan-native path"); - CFG.VPBBsToFix.push_back(PredVPBB); - continue; - } - assert(PredBB && "Predecessor basic-block not found building successor."); auto *PredBBTerminator = PredBB->getTerminator(); LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n'); diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h --- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h @@ -57,9 +57,9 @@ // are introduced. VPDominatorTree VPDomTree; - /// Build plain CFG for TheLoop. Return a new VPRegionBlock (TopRegion) - /// enclosing the plain CFG. - VPRegionBlock *buildPlainCFG(); + /// Build plain CFG for TheLoop. Return the pre-header VPBasicBlock connected + /// to a new VPRegionBlock (TopRegion) enclosing the plain CFG. + VPBasicBlock *buildPlainCFG(); public: VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P) diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp @@ -42,9 +42,6 @@ // Vectorization plan that we are working on. VPlan &Plan; - // Output Top Region. - VPRegionBlock *TopRegion = nullptr; - // Builder of the VPlan instruction-level representation. VPBuilder VPIRBuilder; @@ -59,6 +56,9 @@ // Hold phi node's that need to be fixed once the plain CFG has been built. SmallVector PhisToFix; + /// Maps loops in the original IR to their corresponding region. + DenseMap Loop2Region; + // Utility functions. void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB); void fixPhiNodes(); @@ -73,8 +73,9 @@ PlainCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P) : TheLoop(Lp), LI(LI), Plan(P) {} - // Build the plain CFG and return its Top Region. - VPRegionBlock *buildPlainCFG(); + /// Build plain CFG for TheLoop. Return the pre-header VPBasicBlock connected + /// to a new VPRegionBlock (TopRegion) enclosing the plain CFG. + VPBasicBlock *buildPlainCFG(); }; } // anonymous namespace @@ -83,8 +84,10 @@ void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) { SmallVector VPBBPreds; // Collect VPBB predecessors. - for (BasicBlock *Pred : predecessors(BB)) - VPBBPreds.push_back(getOrCreateVPBB(Pred)); + for (BasicBlock *Pred : predecessors(BB)) { + auto *PredVPBB = getOrCreateVPBB(Pred); + VPBBPreds.push_back(PredVPBB); + } VPBB->setPredecessors(VPBBPreds); } @@ -114,11 +117,22 @@ // Retrieve existing VPBB. return BlockIt->second; + // Get or create a region for the loop containing BB. + Loop *CurrentLoop = LI->getLoopFor(BB); + VPRegionBlock *ParentR = nullptr; + if (CurrentLoop) { + auto Iter = Loop2Region.insert({CurrentLoop, nullptr}); + if (Iter.second) + Iter.first->second = new VPRegionBlock( + CurrentLoop->getHeader()->getName().str(), false /*isReplicator*/); + ParentR = Iter.first->second; + } + // Create new VPBB. LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << BB->getName() << "\n"); VPBasicBlock *VPBB = new VPBasicBlock(BB->getName()); BB2VPBB[BB] = VPBB; - VPBB->setParent(TopRegion); + VPBB->setParent(ParentR); return VPBB; } @@ -237,11 +251,8 @@ } // Main interface to build the plain CFG. -VPRegionBlock *PlainCFGBuilder::buildPlainCFG() { - // 1. Create the Top Region. It will be the parent of all VPBBs. - TopRegion = new VPRegionBlock("TopRegion", false /*isReplicator*/); - - // 2. Scan the body of the loop in a topological order to visit each basic +VPBasicBlock *PlainCFGBuilder::buildPlainCFG() { + // 1. Scan the body of the loop in a topological order to visit each basic // block after having visited its predecessor basic blocks. Create a VPBB for // each BB and link it to its successor and predecessor VPBBs. Note that // predecessors must be set in the same order as they are in the incomming IR. @@ -261,9 +272,9 @@ } // Create empty VPBB for Loop H so that we can link PH->H. VPBlockBase *HeaderVPBB = getOrCreateVPBB(TheLoop->getHeader()); + PreheaderVPBB->setOneSuccessor(HeaderVPBB); HeaderVPBB->setName("vector.body"); // Preheader's predecessors will be set during the loop RPO traversal below. - PreheaderVPBB->setOneSuccessor(HeaderVPBB); LoopBlocksRPO RPO(TheLoop); RPO.perform(LI); @@ -316,24 +327,53 @@ BasicBlock *LoopExitBB = TheLoop->getUniqueExitBlock(); assert(LoopExitBB && "Loops with multiple exits are not supported."); VPBasicBlock *LoopExitVPBB = BB2VPBB[LoopExitBB]; - createVPInstructionsForVPBB(LoopExitVPBB, LoopExitBB); // Loop exit was already set as successor of the loop exiting BB. // We only set its predecessor VPBB now. setVPBBPredsFromBB(LoopExitVPBB, LoopExitBB); - // 4. The whole CFG has been built at this point so all the input Values must + // 4. Fix up region blocks for loops. For each loop, + // * use the header block as entry to the corresponding region, + // * use the latch block as exit of the corresponding region, + // * set the region as successor of the loop pre-header, and + // * set the exit block as successor to the region. + SmallVector LoopWorkList; + LoopWorkList.push_back(TheLoop); + while (!LoopWorkList.empty()) { + Loop *L = LoopWorkList.pop_back_val(); + BasicBlock *Header = L->getHeader(); + BasicBlock *Latch = L->getLoopLatch(); + VPRegionBlock *Region = Loop2Region[L]; + VPBasicBlock *HeaderVPBB = getOrCreateVPBB(Header); + VPBasicBlock *LatchVPBB = getOrCreateVPBB(Latch); + + // Disconnect backedge and pre-header from header. + VPBasicBlock *HeaderPred = getOrCreateVPBB(L->getLoopPreheader()); + VPBlockUtils::disconnectBlocks(HeaderPred, HeaderVPBB); + VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPBB); + + Region->setParent(HeaderPred->getParent()); + Region->setEntry(HeaderVPBB); + VPBlockUtils::connectBlocks(HeaderPred, Region); + + // Disconnect exit block from latch, set exit and connect region to exit + // block. + VPBasicBlock *LatchSucc = getOrCreateVPBB(L->getExitBlock()); + VPBlockUtils::disconnectBlocks(LatchVPBB, LatchSucc); + Region->setExit(LatchVPBB); + VPBlockUtils::connectBlocks(Region, LatchSucc); + + // Queue sub-loops for processing. + LoopWorkList.append(L->begin(), L->end()); + } + // 5. The whole CFG has been built at this point so all the input Values must // have a VPlan couterpart. Fix VPlan phi nodes by adding their corresponding // VPlan operands. fixPhiNodes(); - // 5. Final Top Region setup. Set outermost loop pre-header and single exit as - // Top Region entry and exit. - TopRegion->setEntry(PreheaderVPBB); - TopRegion->setExit(LoopExitVPBB); - return TopRegion; + return PreheaderVPBB; } -VPRegionBlock *VPlanHCFGBuilder::buildPlainCFG() { +VPBasicBlock *VPlanHCFGBuilder::buildPlainCFG() { PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan); return PCFGBuilder.buildPlainCFG(); } @@ -341,10 +381,11 @@ // Public interface to build a H-CFG. void VPlanHCFGBuilder::buildHierarchicalCFG() { // Build Top Region enclosing the plain CFG and set it as VPlan entry. - VPRegionBlock *TopRegion = buildPlainCFG(); - Plan.setEntry(TopRegion); + VPBasicBlock *EntryVPBB = buildPlainCFG(); + Plan.setEntry(EntryVPBB); LLVM_DEBUG(Plan.setName("HCFGBuilder: Plain CFG\n"); dbgs() << Plan); + auto *TopRegion = cast(EntryVPBB->getSingleSuccessor()); Verifier.verifyHierarchicalCFG(TopRegion); // Compute plain CFG dom tree for VPLInfo. diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -24,15 +24,9 @@ GetIntOrFpInductionDescriptor, SmallPtrSetImpl &DeadInstructions, ScalarEvolution &SE) { - auto *TopRegion = cast(Plan->getEntry()); - ReversePostOrderTraversal RPOT(TopRegion->getEntry()); - - for (VPBlockBase *Base : RPOT) { - // Do not widen instructions in pre-header and exit blocks. - if (Base->getNumPredecessors() == 0 || Base->getNumSuccessors() == 0) - continue; - - VPBasicBlock *VPBB = Base->getEntryBasicBlock(); + ReversePostOrderTraversal> + RPOT(VPBlockRecursiveTraversalWrapper(Plan->getEntry())); + for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly(RPOT)) { // Introduce each ingredient into VPlan. for (VPRecipeBase &Ingredient : llvm::make_early_inc_range(*VPBB)) { VPValue *VPV = Ingredient.getVPSingleValue(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -50,7 +50,7 @@ assert(VPB->getParent() == Region && "VPBlockBase has wrong parent"); // Check block's condition bit. - if (VPB->getNumSuccessors() > 1) + if (VPB->getNumSuccessors() > 1 || Region->getExitBasicBlock() == VPB) assert(VPB->getCondBit() && "Missing condition bit!"); else assert(!VPB->getCondBit() && "Unexpected condition bit!"); diff --git a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp --- a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp @@ -49,8 +49,10 @@ VPBasicBlock *VecBB = Entry->getSingleSuccessor()->getEntryBasicBlock(); EXPECT_EQ(7u, VecBB->size()); - EXPECT_EQ(2u, VecBB->getNumPredecessors()); - EXPECT_EQ(2u, VecBB->getNumSuccessors()); + EXPECT_EQ(0u, VecBB->getNumPredecessors()); + EXPECT_EQ(0u, VecBB->getNumSuccessors()); + EXPECT_EQ(VecBB->getParent()->getEntryBasicBlock(), VecBB); + EXPECT_EQ(VecBB->getParent()->getExitBasicBlock(), VecBB); EXPECT_EQ(&*Plan, VecBB->getPlan()); auto Iter = VecBB->begin(); @@ -101,15 +103,15 @@ node [shape=rect, fontname=Courier, fontsize=30] edge [fontname=Courier, fontsize=30] compound=true - subgraph cluster_N0 { + N0 [label = + "entry:\l" + + "Successor(s): for.body\l" + ] + N0 -> N1 [ label="" lhead=cluster_N2] + subgraph cluster_N2 { fontname=Courier - label="\ TopRegion" + label="\ for.body" N1 [label = - "entry:\l" + - "Successor(s): vector.body\l" - ] - N1 -> N2 [ label=""] - N2 [label = "vector.body:\l" + " WIDEN-PHI ir\<%indvars.iv\> = phi ir\<0\>, ir\<%indvars.iv.next\>\l" + " EMIT ir\<%arr.idx\> = getelementptr ir\<%A\> ir\<%indvars.iv\>\l" + @@ -118,17 +120,16 @@ " EMIT store ir\<%res\> ir\<%arr.idx\>\l" + " EMIT ir\<%indvars.iv.next\> = add ir\<%indvars.iv\> ir\<1\>\l" + " EMIT ir\<%exitcond\> = icmp ir\<%indvars.iv.next\> ir\<%N\>\l" + - "Successor(s): vector.body, for.end\l" + + "No successors\l" + "CondBit: ir\<%exitcond\> (vector.body)\l" ] - N2 -> N2 [ label="T"] - N2 -> N3 [ label="F"] - N3 [label = - "for.end:\l" + - " EMIT ret\l" + - "No successors\l" - ] } + N1 -> N3 [ label="" ltail=cluster_N2] + N3 [label = + "for.end:\l" + + " EMIT ret\l" + + "No successors\l" + ] } )"; EXPECT_EQ(ExpectedStr, FullDump); @@ -176,8 +177,10 @@ VPBasicBlock *VecBB = Entry->getSingleSuccessor()->getEntryBasicBlock(); EXPECT_EQ(7u, VecBB->size()); - EXPECT_EQ(2u, VecBB->getNumPredecessors()); - EXPECT_EQ(2u, VecBB->getNumSuccessors()); + EXPECT_EQ(0u, VecBB->getNumPredecessors()); + EXPECT_EQ(0u, VecBB->getNumSuccessors()); + EXPECT_EQ(VecBB->getParent()->getEntryBasicBlock(), VecBB); + EXPECT_EQ(VecBB->getParent()->getExitBasicBlock(), VecBB); auto Iter = VecBB->begin(); EXPECT_NE(nullptr, dyn_cast(&*Iter++)); diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "../lib/Transforms/Vectorize/VPlan.h" +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/Instruction.h" diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h --- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h +++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h @@ -76,8 +76,8 @@ auto Plan = std::make_unique(); VPlanHCFGBuilder HCFGBuilder(LI->getLoopFor(LoopHeader), LI.get(), *Plan); - VPRegionBlock *TopRegion = HCFGBuilder.buildPlainCFG(); - Plan->setEntry(TopRegion); + VPBasicBlock *EntryVPBB = HCFGBuilder.buildPlainCFG(); + Plan->setEntry(EntryVPBB); return Plan; } };