diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -585,6 +585,8 @@ /// This VPBlockBase must have no successors. void setOneSuccessor(VPBlockBase *Successor) { assert(Successors.empty() && "Setting one successor when others exist."); + assert(Successor->getParent() == getParent() && + "connected blocks must have the same parent"); appendSuccessor(Successor); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp @@ -61,6 +61,7 @@ // Utility functions. void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB); + void setRegionPredsFromBB(VPRegionBlock *VPBB, BasicBlock *BB); void fixPhiNodes(); VPBasicBlock *getOrCreateVPBB(BasicBlock *BB); #ifndef NDEBUG @@ -81,11 +82,25 @@ // Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB // must have no predecessors. void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) { - SmallVector VPBBPreds; + auto GetLatchOfExit = [this](BasicBlock *BB) -> BasicBlock * { + auto *SinglePred = BB->getSinglePredecessor(); + if (!SinglePred || LI->getLoopFor(SinglePred) == LI->getLoopFor(BB)) + return nullptr; + // can assert SinglePred is the latch of its loop. + return SinglePred; + }; + if (auto *LatchBB = GetLatchOfExit(BB)) { + auto *PredRegion = getOrCreateVPBB(LatchBB)->getParent(); + assert(VPBB == cast(PredRegion->getSingleSuccessor()) && + "successor must already be set for PredRegion; it must have VPBB " + "as single successor"); + VPBB->setPredecessors({PredRegion}); + return; + } // Collect VPBB predecessors. + SmallVector VPBBPreds; for (BasicBlock *Pred : predecessors(BB)) VPBBPreds.push_back(getOrCreateVPBB(Pred)); - VPBB->setPredecessors(VPBBPreds); } @@ -93,6 +108,13 @@ return L && BB == L->getHeader(); } +void PlainCFGBuilder::setRegionPredsFromBB(VPRegionBlock *Region, + BasicBlock *BB) { + // BB is a loop header block. Connect the region to the loop preheader. + Loop *LoopOfBB = LI->getLoopFor(BB); + Region->setPredecessors({getOrCreateVPBB(LoopOfBB->getLoopPredecessor())}); +} + // Add operands to VPInstructions representing phi nodes from the input IR. void PlainCFGBuilder::fixPhiNodes() { for (auto *Phi : PhisToFix) { @@ -126,32 +148,42 @@ } } +static bool isHeaderVPBB(VPBasicBlock *VPBB) { + return VPBB->getParent() && VPBB->getParent()->getEntry() == VPBB; +} + // Create a new empty VPBasicBlock for an incoming BasicBlock in the region // corresponding to the containing loop or retrieve an existing one if it was // already created. If no region exists yet for the loop containing \p BB, a new // one is created. VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) { - auto BlockIt = BB2VPBB.find(BB); - if (BlockIt != BB2VPBB.end()) + if (auto *VPBB = BB2VPBB.lookup(BB)) { // Retrieve existing VPBB. - return BlockIt->second; - - // Get or create a region for the loop containing BB. - Loop *CurrentLoop = LI->getLoopFor(BB); - VPRegionBlock *ParentR = nullptr; - if (CurrentLoop) { - auto Iter = Loop2Region.insert({CurrentLoop, nullptr}); - if (Iter.second) - Iter.first->second = new VPRegionBlock( - CurrentLoop->getHeader()->getName().str(), false /*isReplicator*/); - ParentR = Iter.first->second; + return VPBB; } // Create new VPBB. LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << BB->getName() << "\n"); VPBasicBlock *VPBB = new VPBasicBlock(BB->getName()); BB2VPBB[BB] = VPBB; - VPBB->setParent(ParentR); + + // Get or create a region for the loop containing BB. + Loop *LoopOfBB = LI->getLoopFor(BB); + if (!LoopOfBB) + return VPBB; + + bool RegionExists = Loop2Region.contains(LoopOfBB); + assert(RegionExists ^ isHeaderBB(BB, LoopOfBB) && + "region must exist or BB must be a loop header"); + if (RegionExists) { + VPBB->setParent(Loop2Region[LoopOfBB]); + } else { + auto *RegionOfVPBB = new VPRegionBlock( + LoopOfBB->getHeader()->getName().str(), false /*isReplicator*/); + RegionOfVPBB->setParent(Loop2Region[LoopOfBB->getParentLoop()]); + RegionOfVPBB->setEntry(VPBB); + Loop2Region[LoopOfBB] = RegionOfVPBB; + } return VPBB; } @@ -286,6 +318,9 @@ BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader(); assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) && "Unexpected loop preheader"); + // buildPlainCFG needs to be called after createInitialVPlan, which creates + // the initial skeleton (including the preheader VPBB). buildPlainCFG builds + // the CFG for the loop nest and hooks it up to the initial skeleton. VPBasicBlock *ThePreheaderVPBB = Plan.getEntry(); BB2VPBB[ThePreheaderBB] = ThePreheaderVPBB; ThePreheaderVPBB->setName("vector.ph"); @@ -294,10 +329,11 @@ continue; IRDef2VPValue[&I] = Plan.getVPValueOrAddLiveIn(&I); } - // Create empty VPBB for Loop H so that we can link PH->H. + // Create region (and header block) for the outer loop, so that we can link + // PH->Region. VPBlockBase *HeaderVPBB = getOrCreateVPBB(TheLoop->getHeader()); HeaderVPBB->setName("vector.body"); - ThePreheaderVPBB->setOneSuccessor(HeaderVPBB); + ThePreheaderVPBB->setOneSuccessor(HeaderVPBB->getParent()); LoopBlocksRPO RPO(TheLoop); RPO.perform(LI); @@ -306,39 +342,46 @@ // Create or retrieve the VPBasicBlock for this BB and create its // VPInstructions. VPBasicBlock *VPBB = getOrCreateVPBB(BB); + VPRegionBlock *Region = VPBB->getParent(); createVPInstructionsForVPBB(VPBB, BB); + Loop *LoopForBB = LI->getLoopFor(BB); + // Set VPBB predecessors in the same order as they are in the incoming BB. + if (!isHeaderBB(BB, LoopForBB)) + setVPBBPredsFromBB(VPBB, BB); + else { + // BB is a loop header, set the predecessor for the region. + assert(isHeaderVPBB(VPBB) && "isHeaderBB and isHeaderVPBB disagree"); + setRegionPredsFromBB(Region, BB); + } // Set VPBB successors. We create empty VPBBs for successors if they don't // exist already. Recipes will be created when the successor is visited // during the RPO traversal. - Instruction *TI = BB->getTerminator(); - assert(TI && "Terminator expected."); - unsigned NumSuccs = TI->getNumSuccessors(); - + auto *BI = cast(BB->getTerminator()); + unsigned NumSuccs = succ_size(BB); if (NumSuccs == 1) { - VPBasicBlock *SuccVPBB = getOrCreateVPBB(TI->getSuccessor(0)); - assert(SuccVPBB && "VPBB Successor not found."); - VPBB->setOneSuccessor(SuccVPBB); - } else if (NumSuccs == 2) { - VPBasicBlock *SuccVPBB0 = getOrCreateVPBB(TI->getSuccessor(0)); - assert(SuccVPBB0 && "Successor 0 not found."); - VPBasicBlock *SuccVPBB1 = getOrCreateVPBB(TI->getSuccessor(1)); - assert(SuccVPBB1 && "Successor 1 not found."); - - // Get VPBB's condition bit. - assert(isa(TI) && "Unsupported terminator!"); - // Look up the branch condition to get the corresponding VPValue - // representing the condition bit in VPlan (which may be in another VPBB). - assert(IRDef2VPValue.count(cast(TI)->getCondition()) && - "Missing condition bit in IRDef2VPValue!"); - - // Link successors. - VPBB->setTwoSuccessors(SuccVPBB0, SuccVPBB1); - } else - llvm_unreachable("Number of successors not supported."); - - // Set VPBB predecessors in the same order as they are in the incoming BB. - setVPBBPredsFromBB(VPBB, BB); + auto *Successor = getOrCreateVPBB(BB->getSingleSuccessor()); + VPBB->setOneSuccessor(isHeaderVPBB(Successor) + ? Successor->getParent() + : static_cast(Successor)); + continue; + } + assert(BI->isConditional() && NumSuccs == 2 && BI->isConditional() && + "block must have conditional branch with 2 successors"); + // Look up the branch condition to get the corresponding VPValue + // representing the condition bit in VPlan (which may be in another VPBB). + assert(IRDef2VPValue.contains(BI->getCondition()) && + "Missing condition bit in IRDef2VPValue!"); + VPBasicBlock *Successor0 = getOrCreateVPBB(BI->getSuccessor(0)); + VPBasicBlock *Successor1 = getOrCreateVPBB(BI->getSuccessor(1)); + if (!LoopForBB || BB != LoopForBB->getLoopLatch()) { + VPBB->setTwoSuccessors(Successor0, Successor1); + continue; + } + // For a latch we need to set the successor of the region rather than that + // of VPBB and it should be set to the exit, i.e., non-header successor. + Region->setOneSuccessor(isHeaderVPBB(Successor0) ? Successor1 : Successor0); + Region->setExiting(VPBB); } // 2. Process outermost loop exit. We created an empty VPBB for the loop @@ -351,43 +394,7 @@ // We only set its predecessor VPBB now. setVPBBPredsFromBB(LoopExitVPBB, LoopExitBB); - // 3. Fix up region blocks for loops. For each loop, - // * use the header block as entry to the corresponding region, - // * use the latch block as exit of the corresponding region, - // * set the region as successor of the loop pre-header, and - // * set the exit block as successor to the region. - SmallVector LoopWorkList; - LoopWorkList.push_back(TheLoop); - while (!LoopWorkList.empty()) { - Loop *L = LoopWorkList.pop_back_val(); - BasicBlock *Header = L->getHeader(); - BasicBlock *Exiting = L->getLoopLatch(); - assert(Exiting == L->getExitingBlock() && - "Latch must be the only exiting block"); - VPRegionBlock *Region = Loop2Region[L]; - VPBasicBlock *HeaderVPBB = getOrCreateVPBB(Header); - VPBasicBlock *ExitingVPBB = getOrCreateVPBB(Exiting); - - // Disconnect backedge and pre-header from header. - VPBasicBlock *PreheaderVPBB = getOrCreateVPBB(L->getLoopPreheader()); - VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPBB); - VPBlockUtils::disconnectBlocks(ExitingVPBB, HeaderVPBB); - - Region->setParent(PreheaderVPBB->getParent()); - Region->setEntry(HeaderVPBB); - VPBlockUtils::connectBlocks(PreheaderVPBB, Region); - - // Disconnect exit block from exiting (=latch) block, set exiting block and - // connect region to exit block. - VPBasicBlock *ExitVPBB = getOrCreateVPBB(L->getExitBlock()); - VPBlockUtils::disconnectBlocks(ExitingVPBB, ExitVPBB); - Region->setExiting(ExitingVPBB); - VPBlockUtils::connectBlocks(Region, ExitVPBB); - - // Queue sub-loops for processing. - LoopWorkList.append(L->begin(), L->end()); - } - // 4. The whole CFG has been built at this point so all the input Values must + // 3. The whole CFG has been built at this point so all the input Values must // have a VPlan couterpart. Fix VPlan phi nodes by adding their corresponding // VPlan operands. fixPhiNodes();