diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -585,6 +585,8 @@ /// This VPBlockBase must have no successors. void setOneSuccessor(VPBlockBase *Successor) { assert(Successors.empty() && "Setting one successor when others exist."); + assert(Successor->getParent() == getParent() && + "connected blocks must have the same parent"); appendSuccessor(Successor); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp @@ -61,7 +61,9 @@ // Utility functions. void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB); + void setRegionPredsFromBB(VPRegionBlock *VPBB, BasicBlock *BB); void fixPhiNodes(); + VPBlockBase *getOrCreateVPB(BasicBlock *BB); VPBasicBlock *getOrCreateVPBB(BasicBlock *BB); #ifndef NDEBUG bool isExternalDef(Value *Val); @@ -83,12 +85,40 @@ void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) { SmallVector VPBBPreds; // Collect VPBB predecessors. - for (BasicBlock *Pred : predecessors(BB)) - VPBBPreds.push_back(getOrCreateVPBB(Pred)); + for (BasicBlock *Pred : predecessors(BB)) { + auto *PredVPBB = getOrCreateVPBB(Pred); + auto *PredRegion = PredVPBB->getParent(); + if (PredRegion == VPBB->getParent()) + VPBBPreds.push_back(PredVPBB); + else { + // VPBB is the exit block of PredVPBB's region. Its predecessor is that + // region. + assert(VPBB == cast(PredRegion->getSingleSuccessor()) && + "successor must already be set for PredRegion; it must have VPBB " + "as single successor"); + VPBBPreds.push_back(PredRegion); + } + } VPBB->setPredecessors(VPBBPreds); } +void PlainCFGBuilder::setRegionPredsFromBB(VPRegionBlock *Region, + BasicBlock *BB) { + // BB is a loop header block. Connect the region to the loop preheader, + // identified by checking if the parent doesn't match Region. + VPBasicBlock *PreheaderVPBB = nullptr; + for (BasicBlock *Pred : predecessors(BB)) { + auto *PredVPBB = getOrCreateVPBB(Pred); + if (PredVPBB->getParent() != Region) { + assert(!PreheaderVPBB && "more than one preheader?"); + PreheaderVPBB = PredVPBB; + } + } + + Region->setPredecessors({PreheaderVPBB}); +} + // Add operands to VPInstructions representing phi nodes from the input IR. void PlainCFGBuilder::fixPhiNodes() { for (auto *Phi : PhisToFix) { @@ -106,32 +136,53 @@ } } +static bool isHeaderBB(BasicBlock *BB, Loop *L) { + return L && BB == L->getHeader(); +} + +static bool isHeaderVPBB(VPBasicBlock *VPBB) { + return VPBB->getParent() && VPBB->getParent()->getEntry() == VPBB; +} + +VPBlockBase *PlainCFGBuilder::getOrCreateVPB(BasicBlock *BB) { + VPBasicBlock *VPBB = getOrCreateVPBB(BB); + if (isHeaderBB(BB, LI->getLoopFor(BB))) + return VPBB->getParent(); + return VPBB; +} + // Create a new empty VPBasicBlock for an incoming BasicBlock in the region // corresponding to the containing loop or retrieve an existing one if it was // already created. If no region exists yet for the loop containing \p BB, a new // one is created. VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) { auto BlockIt = BB2VPBB.find(BB); - if (BlockIt != BB2VPBB.end()) + if (BlockIt != BB2VPBB.end()) { // Retrieve existing VPBB. return BlockIt->second; - - // Get or create a region for the loop containing BB. - Loop *CurrentLoop = LI->getLoopFor(BB); - VPRegionBlock *ParentR = nullptr; - if (CurrentLoop) { - auto Iter = Loop2Region.insert({CurrentLoop, nullptr}); - if (Iter.second) - Iter.first->second = new VPRegionBlock( - CurrentLoop->getHeader()->getName().str(), false /*isReplicator*/); - ParentR = Iter.first->second; } // Create new VPBB. LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << BB->getName() << "\n"); VPBasicBlock *VPBB = new VPBasicBlock(BB->getName()); BB2VPBB[BB] = VPBB; - VPBB->setParent(ParentR); + + // Get or create a region for the loop containing BB. + Loop *LoopOfBB = LI->getLoopFor(BB); + VPRegionBlock *RegionOfVPBB = nullptr; + if (LoopOfBB) { + bool RegionExists = Loop2Region.contains(LoopOfBB); + if (RegionExists) { + RegionOfVPBB = Loop2Region[LoopOfBB]; + VPBB->setParent(RegionOfVPBB); + } else { + RegionOfVPBB = new VPRegionBlock(LoopOfBB->getHeader()->getName().str(), + false /*isReplicator*/); + RegionOfVPBB->setParent(Loop2Region[LoopOfBB->getParentLoop()]); + RegionOfVPBB->setEntry(VPBB); + Loop2Region.insert({LoopOfBB, RegionOfVPBB}); + } + } return VPBB; } @@ -266,6 +317,9 @@ BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader(); assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) && "Unexpected loop preheader"); + // buildPlainCFG needs to be called after createInitialVPlan, which creates + // the initial skeleton (including the preheader VPBB). buildPlainCFG builds + // the CFG for the loop nest and hooks it up to the initial skeleton. VPBasicBlock *ThePreheaderVPBB = Plan.getEntry(); BB2VPBB[ThePreheaderBB] = ThePreheaderVPBB; ThePreheaderVPBB->setName("vector.ph"); @@ -274,10 +328,11 @@ continue; IRDef2VPValue[&I] = Plan.getVPValueOrAddLiveIn(&I); } - // Create empty VPBB for Loop H so that we can link PH->H. + // Create region (and header block) for the outer loop, so that we can link + // PH->Region. VPBlockBase *HeaderVPBB = getOrCreateVPBB(TheLoop->getHeader()); HeaderVPBB->setName("vector.body"); - ThePreheaderVPBB->setOneSuccessor(HeaderVPBB); + ThePreheaderVPBB->setOneSuccessor(HeaderVPBB->getParent()); LoopBlocksRPO RPO(TheLoop); RPO.perform(LI); @@ -286,39 +341,53 @@ // Create or retrieve the VPBasicBlock for this BB and create its // VPInstructions. VPBasicBlock *VPBB = getOrCreateVPBB(BB); + VPRegionBlock *Region = VPBB->getParent(); createVPInstructionsForVPBB(VPBB, BB); + Loop *LoopForBB = LI->getLoopFor(BB); + // Set VPBB predecessors in the same order as they are in the incoming BB. + if (isHeaderBB(BB, LoopForBB)) { + // BB is a loop header, set the predecessor for the region. + assert(isHeaderVPBB(VPBB) && "isHeaderBB and isHeaderVPBB disagree"); + setRegionPredsFromBB(Region, BB); + } else + setVPBBPredsFromBB(VPBB, BB); // Set VPBB successors. We create empty VPBBs for successors if they don't // exist already. Recipes will be created when the successor is visited // during the RPO traversal. - Instruction *TI = BB->getTerminator(); - assert(TI && "Terminator expected."); - unsigned NumSuccs = TI->getNumSuccessors(); - - if (NumSuccs == 1) { - VPBasicBlock *SuccVPBB = getOrCreateVPBB(TI->getSuccessor(0)); - assert(SuccVPBB && "VPBB Successor not found."); - VPBB->setOneSuccessor(SuccVPBB); - } else if (NumSuccs == 2) { - VPBasicBlock *SuccVPBB0 = getOrCreateVPBB(TI->getSuccessor(0)); - assert(SuccVPBB0 && "Successor 0 not found."); - VPBasicBlock *SuccVPBB1 = getOrCreateVPBB(TI->getSuccessor(1)); - assert(SuccVPBB1 && "Successor 1 not found."); - - // Get VPBB's condition bit. - assert(isa(TI) && "Unsupported terminator!"); - // Look up the branch condition to get the corresponding VPValue - // representing the condition bit in VPlan (which may be in another VPBB). - assert(IRDef2VPValue.count(cast(TI)->getCondition()) && - "Missing condition bit in IRDef2VPValue!"); - - // Link successors. - VPBB->setTwoSuccessors(SuccVPBB0, SuccVPBB1); - } else - llvm_unreachable("Number of successors not supported."); - - // Set VPBB predecessors in the same order as they are in the incoming BB. - setVPBBPredsFromBB(VPBB, BB); + auto *BI = cast(BB->getTerminator()); + SmallVector Successors; + for (BasicBlock *Succ : successors(BB)) + Successors.push_back(getOrCreateVPB(Succ)); + assert((Successors.size() == 1 || Successors.size() == 2) && + "block must have one or two successors"); + // Look up the branch condition to get the corresponding VPValue + // representing the condition bit in VPlan (which may be in another VPBB). + assert(Successors.size() == 1 || + IRDef2VPValue.contains(BI->getCondition()) && + "Missing condition bit in IRDef2VPValue!"); + + // If BB is the loop latch, update the region's exiting block and the + // successor of the region. + if (LoopForBB && BB == LoopForBB->getLoopLatch()) { + assert(Successors.size() == 2 && "latch must have 2 successors"); + VPBlockBase *ExitVPBB = + Successors[0] == Region ? Successors[1] : Successors[0]; + Region->setExiting(VPBB); + Region->setParent(ExitVPBB->getParent()); + // For the latch, we need to set the successor for the region. + Region->setOneSuccessor(ExitVPBB); + continue; + } + // Link successors. + if (Successors.size() == 1) + VPBB->setOneSuccessor(Successors[0]); + else { + assert(Successors.size() == 2 && "unexpected number of successors"); + assert(getOrCreateVPB(BI->getSuccessor(0)) == Successors[0] && + "true successor does not match first element in Successors"); + VPBB->setTwoSuccessors(Successors[0], Successors[1]); + } } // 2. Process outermost loop exit. We created an empty VPBB for the loop @@ -331,43 +400,7 @@ // We only set its predecessor VPBB now. setVPBBPredsFromBB(LoopExitVPBB, LoopExitBB); - // 3. Fix up region blocks for loops. For each loop, - // * use the header block as entry to the corresponding region, - // * use the latch block as exit of the corresponding region, - // * set the region as successor of the loop pre-header, and - // * set the exit block as successor to the region. - SmallVector LoopWorkList; - LoopWorkList.push_back(TheLoop); - while (!LoopWorkList.empty()) { - Loop *L = LoopWorkList.pop_back_val(); - BasicBlock *Header = L->getHeader(); - BasicBlock *Exiting = L->getLoopLatch(); - assert(Exiting == L->getExitingBlock() && - "Latch must be the only exiting block"); - VPRegionBlock *Region = Loop2Region[L]; - VPBasicBlock *HeaderVPBB = getOrCreateVPBB(Header); - VPBasicBlock *ExitingVPBB = getOrCreateVPBB(Exiting); - - // Disconnect backedge and pre-header from header. - VPBasicBlock *PreheaderVPBB = getOrCreateVPBB(L->getLoopPreheader()); - VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPBB); - VPBlockUtils::disconnectBlocks(ExitingVPBB, HeaderVPBB); - - Region->setParent(PreheaderVPBB->getParent()); - Region->setEntry(HeaderVPBB); - VPBlockUtils::connectBlocks(PreheaderVPBB, Region); - - // Disconnect exit block from exiting (=latch) block, set exiting block and - // connect region to exit block. - VPBasicBlock *ExitVPBB = getOrCreateVPBB(L->getExitBlock()); - VPBlockUtils::disconnectBlocks(ExitingVPBB, ExitVPBB); - Region->setExiting(ExitingVPBB); - VPBlockUtils::connectBlocks(Region, ExitVPBB); - - // Queue sub-loops for processing. - LoopWorkList.append(L->begin(), L->end()); - } - // 4. The whole CFG has been built at this point so all the input Values must + // 3. The whole CFG has been built at this point so all the input Values must // have a VPlan couterpart. Fix VPlan phi nodes by adding their corresponding // VPlan operands. fixPhiNodes();