Index: lib/Transforms/Vectorize/VPlan.h =================================================================== --- lib/Transforms/Vectorize/VPlan.h +++ lib/Transforms/Vectorize/VPlan.h @@ -58,6 +58,7 @@ class Value; class VPBasicBlock; class VPBlockBase; +class VPDominatorTree; class VPLoop; typedef LoopInfoBase VPLoopInfo; class VPRegionBlock; @@ -515,6 +516,14 @@ appendPredecessor(Pred); } + // Replace \p Old predecessor block with \p New block. If \p Old is a + // predecessor multiple times, only the first instance is replaced. + void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) { + auto PredIt = std::find(Predecessors.begin(), Predecessors.end(), Old); + assert(PredIt != Predecessors.end() && "Predecessor not found!"); + Predecessors[PredIt - Predecessors.begin()] = New; + } + /// The method which generates the output IR that correspond to this /// VPBlockBase, thereby "executing" the VPlan. virtual void execute(struct VPTransformState *State) = 0; @@ -1378,23 +1387,46 @@ /// Class that provides utilities for VPBlockBases in VPlan. class VPBlockUtils { +private: + /// Move successors and condition bit from VPBlockBase \p From to + /// VPBlockBase \p To. + static void moveSuccessors(VPBlockBase *From, VPBlockBase *To) { + auto &Successors = From->getSuccessors(); + for (auto &Succ : Successors) { + Succ->replacePredecessor(From, To); + To->appendSuccessor(Succ); + } + + // Move condition bit from From to To. + To->setCondBit(From->getCondBit()); + From->setCondBit(nullptr); + + // Remove successors from From. + Successors.clear(); + } + public: VPBlockUtils() = delete; /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. If \p BlockPtr - /// has more than one successor, its conditional bit is propagated to \p + /// has more than one successor, its condition bit is propagated to \p /// NewBlock. \p NewBlock must have neither successors nor predecessors. static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) { assert(NewBlock->getSuccessors().empty() && "Can't insert new block with successors."); - // TODO: move successors from BlockPtr to NewBlock when this functionality - // is necessary. For now, setBlockSingleSuccessor will assert if BlockPtr - // already has successors. - BlockPtr->setOneSuccessor(NewBlock); - NewBlock->setPredecessors({BlockPtr}); - NewBlock->setParent(BlockPtr->getParent()); + + // Move successors, set parent and connect NewBlock. + moveSuccessors(BlockPtr, NewBlock); + VPRegionBlock *ParentRegion = BlockPtr->getParent(); + NewBlock->setParent(ParentRegion); + connectBlocks(BlockPtr, NewBlock); + + // If BlockPtr is parent region's exit, set NewBlock as parent region's + // exit. + if (ParentRegion && ParentRegion->getExit() == BlockPtr) + ParentRegion->setExit(NewBlock); } /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p @@ -1436,6 +1468,12 @@ From->removeSuccessor(To); To->removePredecessor(From); } + + /// Split \p Block by turning Block->Successors into Block->NewSuccessor-> + /// Successors and updates VPLoopInfo, DomTree and PostDomTree accordingly. + /// \return the new VPBasicBlock created as a result of the split. + static VPBasicBlock *splitBlock(VPBlockBase *Block, VPLoopInfo *VPLInfo, + VPDominatorTree &DomTree); }; } // end namespace llvm Index: lib/Transforms/Vectorize/VPlan.cpp =================================================================== --- lib/Transforms/Vectorize/VPlan.cpp +++ lib/Transforms/Vectorize/VPlan.cpp @@ -600,5 +600,29 @@ O << "\\l\""; } +VPBasicBlock *VPBlockUtils::splitBlock(VPBlockBase *Block, VPLoopInfo *VPLInfo, + VPDominatorTree &DomTree) { + VPBasicBlock *NewBlock = new VPBasicBlock(Block->getName() + ".split"); + insertBlockAfter(NewBlock, Block); + + // 1. Add NewBlock to VPLoopInfo. + if (VPLoop *Loop = VPLInfo->getLoopFor(Block)) { + Loop->addBasicBlockToLoop(NewBlock, *VPLInfo); + } + + // 2. Update domination information. + VPDomTreeNode *BlockDT = DomTree.getNode(Block); + SmallVector BlockDTChildren(BlockDT->begin(), + BlockDT->end()); + // Block is NewBlock's idom. + VPDomTreeNode *NewBlockDT = DomTree.addNewBlock(NewBlock, Block /*IDom*/); + + // NewBlock dominates all the other nodes dominated by Block. + for (VPDomTreeNode *Child : BlockDTChildren) + DomTree.changeImmediateDominator(Child, NewBlockDT); + + return NewBlock; +} + using VPDomTree = DomTreeBase; template void DomTreeBuilder::Calculate(VPDomTree &DT); Index: lib/Transforms/Vectorize/VPlanHCFGBuilder.h =================================================================== --- lib/Transforms/Vectorize/VPlanHCFGBuilder.h +++ lib/Transforms/Vectorize/VPlanHCFGBuilder.h @@ -32,6 +32,7 @@ namespace llvm { class Loop; +class VPLoop; /// Main class to build the VPlan H-CFG for an incoming IR. class VPlanHCFGBuilder { @@ -53,6 +54,11 @@ // are introduced. VPDominatorTree VPDomTree; + // Methods to simplify the plain CFG. + void simplifyPlainCFG(); + void splitLoopsPreheader(VPLoop *VPLp); + void splitLoopsExit(VPLoop *VPLp); + public: VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P) : TheLoop(Lp), LI(LI), Plan(P) {} Index: lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp =================================================================== --- lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp +++ lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp @@ -325,6 +325,74 @@ return TopRegion; } +// Split the pre-header VPBasicBlocks of \p VPLp and all it nested VPLoops. A +// pre-header is split if it meets one of the following conditions: +// 1. It's the loop header of another loop. +// 2. It has multiple predecessors (it's a potential exit of another region). +// +// TODO: This function is currently splitting VPBBs without taking into account +// VPInstructions within them. In other words, VPInstructions are kept in the +// original VPBB. Eventually, we may want to do a smarter splitting and +// consciously move those VPInstructions that should better be in the VPBB +// resulting from the split. +void VPlanHCFGBuilder::splitLoopsPreheader(VPLoop *VPLp) { + VPLoopInfo *VPLInfo = Plan.getVPLoopInfo(); + VPBlockBase *PH = VPLp->getLoopPreheader(); + assert(PH && PH->getNumSuccessors() == 1 && + "Expected loop pre-header with a single successor!"); + + // Split loop PH. + if (PH->getNumPredecessors() > 1 /*2*/ || VPLInfo->isLoopHeader(PH) /*1*/) + VPBlockUtils::splitBlock(PH, VPLInfo, VPDomTree); + + // Recurse inside nested loops. + for (auto *VPSubLp : VPLp->getSubLoops()) + splitLoopsPreheader(VPSubLp); +} + +// Split the exit VPBasicBlocks of \p VPLp and all it nested VPLoops. An +// exit is split if it meets one of the following conditions: +// 1. It's the loop pre-header of another loop. +// 2. It has multiple successors (it's a potential entry of another region). +// This case covers exits that are also loop latches of another loops. +// +// TODO: This function is currently splitting VPBBs without taking into account +// VPInstructions within them. In other words, VPInstructions are kept in the +// original VPBB. Eventually, we may want to do a smarter splitting and +// consciously move those VPInstructions that should better be in the VPBB +// resulting from the split. +void VPlanHCFGBuilder::splitLoopsExit(VPLoop *VPLp) { + VPLoopInfo *VPLInfo = Plan.getVPLoopInfo(); + VPBlockBase *Exit = VPLp->getUniqueExitBlock(); + assert(Exit && "Only single-exit loops expected!"); + + // Split loop exit. + VPBlockBase *PotentialH = Exit->getSingleSuccessor(); + if (!PotentialH /*2*/ || VPLInfo->isLoopHeader(PotentialH) /*1*/) + VPBlockUtils::splitBlock(Exit, VPLInfo, VPDomTree); + + // Recurse inside nested loops. + for (auto *VPSubLp : VPLp->getSubLoops()) + splitLoopsExit(VPSubLp); +} + +// Main function that simplifies the plain CFG to be able to introduce +// VPLoopRegions and non-loop VPRegionBlocks during the hierarchical CFG +// construction. It mainly splits VPBasicBlocks that are suitable for +// simultaneously being entry and exit blocks of two regions. +void VPlanHCFGBuilder::simplifyPlainCFG() { + assert(isa(Plan.getEntry()) && + "VPlan entry is not a VPRegionBlock"); + VPLoopInfo *VPLInfo = Plan.getVPLoopInfo(); + + assert(std::distance(VPLInfo->begin(), VPLInfo->end()) == 1 && + "Expected only 1 top-level loop."); + VPLoop *TopLoop = *VPLInfo->begin(); + + splitLoopsPreheader(TopLoop); + splitLoopsExit(TopLoop); +} + VPRegionBlock *VPlanHCFGBuilder::buildPlainCFG() { PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan); return PCFGBuilder.buildPlainCFG(); @@ -350,4 +418,16 @@ Plan.setVPLoopInfo(VPLInfo); LLVM_DEBUG(dbgs() << "VPLoop Info After buildPlainCFG:\n"; VPLInfo->print(dbgs())); + + // Prepare/simplify CFG for hierarchical CFG construction. + simplifyPlainCFG(); + + LLVM_DEBUG(Plan.setName("HCFGBuilder: After simplifyPlainCFG\n"); + dbgs() << Plan); + LLVM_DEBUG(dbgs() << "Dominator Tree After simplifyPlainCFG\n"; + VPDomTree.print(dbgs())); + LLVM_DEBUG(dbgs() << "VPLoop Info After simplifyPlainCFG:\n"; + VPLInfo->print(dbgs())); + + Verifier.verifyHierarchicalCFG(TopRegion); } Index: unittests/Transforms/Vectorize/VPlanHCFGTest.cpp =================================================================== --- unittests/Transforms/Vectorize/VPlanHCFGTest.cpp +++ unittests/Transforms/Vectorize/VPlanHCFGTest.cpp @@ -17,7 +17,7 @@ class VPlanHCFGTest : public VPlanTestBase {}; -TEST_F(VPlanHCFGTest, testBuildHCFGInnerLoop) { +TEST_F(VPlanHCFGTest, testBuildPlainCFGInnerLoop) { const char *ModuleString = "define void @f(i32* %A, i64 %N) {\n" "entry:\n" @@ -39,7 +39,7 @@ Function *F = M.getFunction("f"); BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor(); - auto Plan = buildHCFG(LoopHeader); + auto Plan = buildPlainCFG(LoopHeader); VPBasicBlock *Entry = Plan->getEntry()->getEntryBasicBlock(); EXPECT_NE(nullptr, Entry->getSingleSuccessor()); @@ -154,5 +154,68 @@ EXPECT_EQ(VecBB->end(), Iter); } +TEST_F(VPlanHCFGTest, testSimplifyPlainCFGOuterLoop) { + const char *ModuleString = + "define void @outer(i32* %a, i32* %b) {\n" + "entry:\n" + " br label %outer.body\n" + "outer.body:\n" // Outer loop body + inner loop ph. + " %iv35 = phi i64 [ 0, %entry ], [ %iv.next36, %outer.inc ]\n" + " %0 = mul nsw i64 %iv35, 64\n" + " br label %inner.body\n" + "inner.body:\n" + " %iv = phi i64 [ 0, %outer.body ], [ %iv.next, %inner.body ]\n" + " %1 = add nsw i64 %iv, %0\n" + " %idx = getelementptr inbounds i32, i32* %b, i64 %1\n" + " %2 = load i32, i32* %idx, align 4\n" + " %idx12 = getelementptr inbounds i32, i32* %a, i64 %1\n" + " store i32 %2, i32* %idx12, align 4\n" + " %iv.next = add nuw nsw i64 %iv, 1\n" + " %exitcond = icmp eq i64 %iv.next, 64\n" + " br i1 %exitcond, label %outer.inc, label %inner.body\n" + "outer.inc:\n" // Outer loop latch + inner loop exit. + " %iv.next36 = add nuw nsw i64 %iv35, 1\n" + " %exitcond39 = icmp eq i64 %iv.next36, 64\n" + " br i1 %exitcond39, label %for.end15, label %outer.body\n" + "for.end15:\n" + " ret void\n" + "}\n"; + + Module &M = parseModule(ModuleString); + + Function *F = M.getFunction("outer"); + BasicBlock *LoopHeader = F->getEntryBlock().getSingleSuccessor(); + auto Plan = buildHCFG(LoopHeader); + + VPBasicBlock *OuterPH = Plan->getEntry()->getEntryBasicBlock(); + EXPECT_NE(OuterPH, nullptr); + EXPECT_EQ(OuterPH->getCondBit(), nullptr); + + VPBlockBase *OuterBody = OuterPH->getSingleSuccessor(); + EXPECT_NE(OuterBody, nullptr); + EXPECT_EQ(OuterBody->getCondBit(), nullptr); + + // Simplify plain CFG should have introduced an independent inner loop PH. + VPBasicBlock *InnerPH = OuterBody->getSingleSuccessor()->getEntryBasicBlock(); + EXPECT_NE(InnerPH, nullptr); + EXPECT_EQ(InnerPH->getCondBit(), nullptr); + + VPBlockBase *InnerBody = InnerPH->getSingleSuccessor(); + EXPECT_NE(InnerBody, nullptr); + EXPECT_NE(InnerBody->getCondBit(), nullptr); + + VPBlockBase *InnerExit = InnerBody->getSuccessors()[0]; + EXPECT_NE(InnerExit, nullptr); + EXPECT_EQ(InnerExit->getCondBit(), nullptr); + + // Simplify plain CFG should have introduced an independent inner loop exit. + VPBlockBase *OuterLatch = InnerExit->getSingleHierarchicalSuccessor(); + EXPECT_NE(OuterLatch, nullptr); + EXPECT_NE(OuterLatch->getCondBit(), nullptr); + + VPBlockBase *OuterExit = OuterLatch->getSuccessors()[0]; + EXPECT_NE(OuterExit, nullptr); + EXPECT_EQ(OuterExit->getCondBit(), nullptr); +} } // namespace } // namespace llvm