Index: lib/Transforms/Vectorize/CMakeLists.txt =================================================================== --- lib/Transforms/Vectorize/CMakeLists.txt +++ lib/Transforms/Vectorize/CMakeLists.txt @@ -4,6 +4,8 @@ SLPVectorizer.cpp Vectorize.cpp VPlan.cpp + VPlanHCFGBuilder.cpp + VPlanVerifier.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -56,6 +56,7 @@ #include "llvm/Transforms/Vectorize/LoopVectorize.h" #include "LoopVectorizationPlanner.h" +#include "VPlanHCFGBuilder.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" @@ -264,6 +265,16 @@ cl::desc("Enable VPlan-native vectorization path with " "support for outer loop vectorization.")); +// This flag can be used for stress testing the VPlan H-CFG construction in the +// VPlan-native vectorization path. It must be used in conjuction with +// -enable-vplan-native-path. -vplan-verify-hcfg can also be used to enable the +// verification of the H-CFGs built. +static cl::opt VPlanBuildStressTest( + "vplan-build-stress-test", cl::init(false), cl::Hidden, + cl::desc("Build VPlan for every supported loop nest in the function and " + "bail out (stress test the VPlan HCFG construction in the " + "VPlan-native vectorization path).")); + /// Create an analysis remark that explains why vectorization failed /// /// \p PassName is the name of the pass (e.g. can be AlwaysPrint). \p @@ -2365,7 +2376,8 @@ SmallVectorImpl &V) { // Collect inner loops and outer loops without irreducible control flow. For // now, only collect outer loops that have explicit vectorization hints. - if (L.empty() || (EnableVPlanNativePath && isExplicitVecOuterLoop(&L, ORE))) { + if (L.empty() || VPlanBuildStressTest || + (EnableVPlanNativePath && isExplicitVecOuterLoop(&L, ORE))) { LoopBlocksRPO RPOT(&L); RPOT.perform(LI); if (!containsIrreducibleCFG(RPOT, *LI)) { @@ -7685,6 +7697,12 @@ DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n"); buildVPlans(UserVF, UserVF); + // For VPlan build stress testing, we bail out after VPlan construction. + // TODO: Consider including future predication and cost modeling as part of + // stress testing. + if (VPlanBuildStressTest) + return {UserVF, 0}; + return {UserVF, 0}; } @@ -8214,9 +8232,11 @@ "VPBB has successors when handling predicated replication."); // Record predicated instructions for above packing optimizations. PredInst2Recipe[I] = Recipe; - VPBlockBase *Region = - VPBB->setOneSuccessor(createReplicateRegion(I, Recipe, Plan)); - return cast(Region->setOneSuccessor(new VPBasicBlock())); + VPBlockBase *Region = createReplicateRegion(I, Recipe, Plan); + VPBlockUtils::insertBlockAfter(Region, VPBB); + auto *RegSucc = new VPBasicBlock(); + VPBlockUtils::insertBlockAfter(RegSucc, Region); + return RegSucc; } VPRegionBlock * @@ -8242,8 +8262,8 @@ // Note: first set Entry as region entry and then connect successors starting // from it in order, to propagate the "parent" of each VPBasicBlock. - Entry->setTwoSuccessors(Pred, Exit); - Pred->setOneSuccessor(Exit); + VPBlockUtils::insertTwoBlocksAfter(Pred, Exit, Entry); + VPBlockUtils::connectBlocks(Pred, Exit); return Region; } @@ -8260,6 +8280,11 @@ // Create new empty VPlan auto Plan = llvm::make_unique(); + + // Build hierarchical CFG + VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI); + HCFGBuilder.buildHierarchicalCFG(*Plan.get()); + return Plan; } @@ -8301,7 +8326,7 @@ // ingredients and fill a new VPBasicBlock. unsigned VPBBsForBB = 0; auto *FirstVPBBForBB = new VPBasicBlock(BB->getName()); - VPBB->setOneSuccessor(FirstVPBBForBB); + VPBlockUtils::insertBlockAfter(FirstVPBBForBB, VPBB); VPBB = FirstVPBBForBB; Builder.setInsertPoint(VPBB); @@ -8405,7 +8430,7 @@ VPBasicBlock *PreEntry = cast(Plan->getEntry()); assert(PreEntry->empty() && "Expecting empty pre-entry block."); VPBlockBase *Entry = Plan->setEntry(PreEntry->getSingleSuccessor()); - PreEntry->disconnectSuccessor(Entry); + VPBlockUtils::disconnectBlocks(PreEntry, Entry); delete PreEntry; std::string PlanName; @@ -8669,11 +8694,18 @@ LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM); // Get user vectorization factor. - unsigned UserVF = Hints.getWidth(); + // TODO: We set UserVF to 4 for stress testing. This won't be necessary when + // UserVF is not required in the VPlan-native path. + unsigned UserVF = VPlanBuildStressTest ? 4 : Hints.getWidth(); // Plan how to best vectorize, return the best VF and its cost. LVP.plan(OptForSize, UserVF); + // For VPlan build stress testing, we bail out after planning so that we + // don't execute any plan. + if (VPlanBuildStressTest) + return false; + return false; } Index: lib/Transforms/Vectorize/VPlan.h =================================================================== --- lib/Transforms/Vectorize/VPlan.h +++ lib/Transforms/Vectorize/VPlan.h @@ -306,6 +306,8 @@ /// VPBlockBase is the building block of the Hierarchical Control-Flow Graph. /// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock. class VPBlockBase { + friend class VPBlockUtils; + private: const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast). @@ -372,6 +374,7 @@ /// for any other purpose, as the values may change as LLVM evolves. unsigned getVPBlockID() const { return SubclassID; } + VPRegionBlock *getParent() { return Parent; } const VPRegionBlock *getParent() const { return Parent; } void setParent(VPRegionBlock *P) { Parent = P; } @@ -406,6 +409,9 @@ return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr); } + size_t getNumSuccessors() const { return Successors.size(); } + size_t getNumPredecessors() const { return Predecessors.size(); } + /// An Enclosing Block of a block B is any block containing B, including B /// itself. \return the closest enclosing block starting from "this", which /// has successors. \return the root enclosing block if all enclosing blocks @@ -449,34 +455,31 @@ return getEnclosingBlockWithPredecessors()->getSinglePredecessor(); } - /// Sets a given VPBlockBase \p Successor as the single successor and \return - /// \p Successor. The parent of this Block is copied to be the parent of - /// \p Successor. - VPBlockBase *setOneSuccessor(VPBlockBase *Successor) { + /// Set a given VPBlockBase \p Successor as the single successor of this + /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor. + /// This VPBlockBase must have no successors. + void setOneSuccessor(VPBlockBase *Successor) { assert(Successors.empty() && "Setting one successor when others exist."); appendSuccessor(Successor); - Successor->appendPredecessor(this); - Successor->Parent = Parent; - return Successor; } - /// Sets two given VPBlockBases \p IfTrue and \p IfFalse to be the two - /// successors. The parent of this Block is copied to be the parent of both - /// \p IfTrue and \p IfFalse. + /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two + /// successors of this VPBlockBase. This VPBlockBase is not added as + /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no + /// successors. void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) { assert(Successors.empty() && "Setting two successors when others exist."); appendSuccessor(IfTrue); appendSuccessor(IfFalse); - IfTrue->appendPredecessor(this); - IfFalse->appendPredecessor(this); - IfTrue->Parent = Parent; - IfFalse->Parent = Parent; } - void disconnectSuccessor(VPBlockBase *Successor) { - assert(Successor && "Successor to disconnect is null."); - removeSuccessor(Successor); - Successor->removePredecessor(this); + /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase. + /// This VPBlockBase must have no predecessors. This VPBlockBase is not added + /// as successor of any VPBasicBlock in \p NewPreds. + void setPredecessors(ArrayRef NewPreds) { + assert(Predecessors.empty() && "Block predecessors already set."); + for (auto *Pred : NewPreds) + appendPredecessor(Pred); } /// The method which generates the output IR that correspond to this @@ -963,6 +966,9 @@ Entry->setParent(this); Exit->setParent(this); } + VPRegionBlock(const std::string &Name = "", bool IsReplicator = false) + : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exit(nullptr), + IsReplicator(IsReplicator) {} ~VPRegionBlock() override { if (Entry) @@ -977,9 +983,27 @@ const VPBlockBase *getEntry() const { return Entry; } VPBlockBase *getEntry() { return Entry; } + /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p + /// EntryBlock must have no predecessors. + void setEntry(VPBlockBase *EntryBlock) { + assert(EntryBlock->getPredecessors().empty() && + "Entry block cannot have predecessors."); + Entry = EntryBlock; + EntryBlock->setParent(this); + } + const VPBlockBase *getExit() const { return Exit; } VPBlockBase *getExit() { return Exit; } + /// Set \p EntryBlock as the exit VPBlockBase of this VPRegionBlock. \p + /// ExitBlock must have no successors. + void setExit(VPBlockBase *ExitBlock) { + assert(ExitBlock->getSuccessors().empty() && + "Exit block cannot have successors."); + Exit = ExitBlock; + ExitBlock->setParent(this); + } + /// An indicator whether this region is to generate multiple replicated /// instances of output IR corresponding to its VPBlockBases. bool isReplicator() const { return IsReplicator; } @@ -1184,6 +1208,70 @@ } }; +//===----------------------------------------------------------------------===// +// VPlan Utilities +//===----------------------------------------------------------------------===// + +/// Class that provides utilities for VPBlockBases in VPlan. +class VPBlockUtils { +public: + VPBlockUtils() = delete; + + /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p + /// NewBlock as successor of \p BlockPtr and \p Block as predecessor of \p + /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p NewBlock + /// must have neither successors nor predecessors. + static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) { + assert(NewBlock->getSuccessors().empty() && + "Can't insert new block with successors."); + // TODO: move successors from BlockPtr to NewBlock when this functionality + // is necessary. For now, setBlockSingleSuccessor will assert if BlockPtr + // already has successors. + BlockPtr->setOneSuccessor(NewBlock); + NewBlock->setPredecessors({BlockPtr}); + NewBlock->setParent(BlockPtr->getParent()); + } + + /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p + /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p + /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr + /// parent to \p IfTrue and \p IfFalse. \p BlockPtr must have no successors + /// and \p IfTrue and \p IfFalse must have neither successors nor + /// predecessors. + static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, + VPBlockBase *BlockPtr) { + assert(IfTrue->getSuccessors().empty() && + "Can't insert IfTrue with successors."); + assert(IfFalse->getSuccessors().empty() && + "Can't insert IfFalse with successors."); + BlockPtr->setTwoSuccessors(IfTrue, IfFalse); + IfTrue->setPredecessors({BlockPtr}); + IfFalse->setPredecessors({BlockPtr}); + IfTrue->setParent(BlockPtr->getParent()); + IfFalse->setParent(BlockPtr->getParent()); + } + + /// Connect VPBlockBases \p From and \p To bi-directionally. Append \p To to + /// the successors of \p From and \p From to the predecessors of \p To. Both + /// VPBlockBases must have the same parent, which can be null. Both + /// VPBlockBases can be already connected to other VPBlockBases. + static void connectBlocks(VPBlockBase *From, VPBlockBase *To) { + assert((From->getParent() == To->getParent()) && + "Can't connect two block with different parents"); + assert(From->getNumSuccessors() < 2 && + "Blocks can't have more than two successors."); + From->appendSuccessor(To); + To->appendPredecessor(From); + } + + /// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To + /// from the successors of \p From and \p From from the predecessors of \p To. + static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To) { + assert(To && "Successor to disconnect is null."); + From->removeSuccessor(To); + To->removePredecessor(From); + } +}; } // end namespace llvm #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H Index: lib/Transforms/Vectorize/VPlanHCFGBuilder.h =================================================================== --- /dev/null +++ lib/Transforms/Vectorize/VPlanHCFGBuilder.h @@ -0,0 +1,57 @@ +//===-- VPlanHCFGBuilder.h --------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the VPlanHCFGBuilder class which contains the public +/// interface (buildHierarchicalCFG) to build a VPlan-based Hierarchical CFG +/// (H-CFG) for an incoming IR. +/// +/// A H-CFG in VPlan is a control-flow graph whose nodes are VPBasicBlocks +/// and/or VPRegionBlocks (i.e., other H-CFGs). The outermost H-CFG of a VPlan +/// consists of a VPRegionBlock, denoted Top Region, which encloses any other +/// VPBlockBase in the H-CFG. This guarantees that any VPBlockBase in the H-CFG +/// other than the Top Region will have a parent VPRegionBlock and allows us +/// to easily add more nodes before/after the main vector loop (such as the +/// reduction epilogue). +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_VPLANHCFGBUILDER_H +#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_VPLANHCFGBUILDER_H + +#include "VPlan.h" +#include "VPlanVerifier.h" +#include "llvm/ADT/DenseMap.h" + +namespace llvm { + +class ScalarEvolution; +class Loop; + +/// Main class to build the VPlan H-CFG for an incoming IR. +class VPlanHCFGBuilder { +private: + // The outermost loop of the input loop nest considered for vectorization. + Loop *TheLoop; + + // Loop Info analysis. + LoopInfo *LI; + + // VPlan verifier utility. + VPlanVerifier Verifier; + +public: + VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI) : TheLoop(Lp), LI(LI) {} + + /// Build H-CFG for TheLoop and update \p Plan accordingly. + void buildHierarchicalCFG(VPlan &Plan); +}; +} // namespace llvm + +#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_VPLANHCFGBUILDER_H Index: lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp =================================================================== --- /dev/null +++ lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp @@ -0,0 +1,200 @@ +//===-- VPlanHCFGBuilder.cpp ----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements the construction of a VPlan-based Hierarchical CFG +/// (H-CFG) for an incoming IR. This construction comprises the following +/// components and steps: +// +/// 1. PlainCFGBuilder class: builds a plain VPBasicBlock-based CFG that +/// faithfully represents the CFG in the incoming IR. A VPRegionBlock (Top +/// Region) is created to enclose and serve as parent of all the VPBasicBlocks +/// in the plain CFG. +/// NOTE: At this point, there is a direct correspondence between all the +/// VPBasicBlocks created for the initial plain CFG and the incoming +/// BasicBlocks. However, this might change in the future. +/// +//===----------------------------------------------------------------------===// + +#include "VPlanHCFGBuilder.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/ScalarEvolution.h" + +#define DEBUG_TYPE "vplan-hcfg-builder" + +using namespace llvm; + +// Class that is used to build the plain CFG for the incoming IR. +class PlainCFGBuilder { +private: + // The outermost loop of the input loop nest considered for vectorization. + Loop *TheLoop; + + // Loop Info analysis. + LoopInfo *LI; + + // Output Top Region. + VPRegionBlock *TopRegion = nullptr; + + // Map incoming BasicBlocks to newly-created VPBasicBlocks. This map is + // intentionally destroyed after the plain CFG construction because subsequent + // VPlan-to-VPlan transformation may invalidate the mapping with the incoming + // IR. + DenseMap BB2VPBB; + + // Utility functions. + void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB); + VPBasicBlock *getOrCreateVPBB(BasicBlock *BB); + void createRecipesForVPBB(VPBasicBlock *VPBB, BasicBlock *BB); + +public: + PlainCFGBuilder(Loop *Lp, LoopInfo *LI) : TheLoop(Lp), LI(LI) {} + + // Build the plain CFG and return its Top Region. + VPRegionBlock *buildPlainCFG(); +}; + +// Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB +// must have no predecessors. +void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) { + SmallVector VPBBPreds; + // Collect VPBB predecessors. + for (BasicBlock *Pred : predecessors(BB)) + VPBBPreds.push_back(getOrCreateVPBB(Pred)); + + VPBB->setPredecessors(VPBBPreds); +} + +// Create a new empty VPBasicBlock for an incoming BasicBlock or retrieve an +// existing one if it was already created. +VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) { + auto BlockIt = BB2VPBB.find(BB); + if (BlockIt != BB2VPBB.end()) + // Retrieve existing VPBB. + return BlockIt->second; + + // Create new VPBB. + DEBUG(dbgs() << "Creating VPBasicBlock for " << BB->getName() << "\n"); + VPBasicBlock *VPBB = new VPBasicBlock(BB->getName()); + BB2VPBB[BB] = VPBB; + VPBB->setParent(TopRegion); + return VPBB; +} + +// Create and insert VPWidenRecipes and VPWidenMemoryInstructionRecipes in +// VPBasicBlock \p VPBB for Instructions in incoming BasicBlock \p BB. +// TODO: We are introducing these two kind of recipes to mimic what the inner +// loop vectorization path is currently doing. This is a stepping stone to the +// VPInstruction model. These recipes will be replaced with VPInstruction in the +// future. +void PlainCFGBuilder::createRecipesForVPBB(VPBasicBlock *VPBB, BasicBlock *BB) { + for (Instruction &Inst : *BB) { + // Create VPWidenMemoryInstructionRecipe for loads and stores. + if (isa(Inst) || isa(Inst)) { + VPBB->appendRecipe( + new VPWidenMemoryInstructionRecipe(Inst, nullptr /*Mask*/)); + continue; + } + + // Create VPWidenRecipe to widen this instruction. We optimize the common + // case where consecutive instructions can be represented by a single + // recipe. + if (!VPBB->empty()) { + VPWidenRecipe *LastWidenRecipe = dyn_cast(&VPBB->back()); + if (LastWidenRecipe && LastWidenRecipe->appendInstruction(&Inst)) + continue; + } + + VPBB->appendRecipe(new VPWidenRecipe(&Inst)); + } +} + +// Main interface to build the plain CFG. +VPRegionBlock *PlainCFGBuilder::buildPlainCFG() { + // 1. Create the Top Region. It will be the parent of all VPBBs. + TopRegion = new VPRegionBlock("TopRegion", false /*isReplicator*/); + + // 2. Scan the loop body in topological order to visit each BB after having + // visited its predecessors. Create a VPBB for each BB and link it to its + // successor and predecessor VPBBs. Note that predecessors must be set in the + // same order as they are in the incomming IR. Otherwise, there might be + // problems with existing phi nodes and algorithm based on predecessors + // traversal. + LoopBlocksRPO RPO(TheLoop); + RPO.perform(LI); + + for (BasicBlock *BB : RPO) { + // Create or retrieve the VPBasicBlock for this BB and create its recipes. + VPBasicBlock *VPBB = getOrCreateVPBB(BB); + createRecipesForVPBB(VPBB, BB); + + // Set VPBB successors. We create empty VPBBs for successors if they don't + // exist already. Recipes will be created when the successor is visited + // during the RPO traversal. + TerminatorInst *TI = BB->getTerminator(); + assert(TI && "Terminator expected."); + unsigned NumSuccs = TI->getNumSuccessors(); + + if (NumSuccs == 1) { + VPBasicBlock *SuccVPBB = getOrCreateVPBB(TI->getSuccessor(0)); + assert(SuccVPBB && "VPBB Successor not found."); + VPBB->setOneSuccessor(SuccVPBB); + } else if (NumSuccs == 2) { + VPBasicBlock *SuccVPBB0 = getOrCreateVPBB(TI->getSuccessor(0)); + assert(SuccVPBB0 && "Successor 0 not found."); + VPBasicBlock *SuccVPBB1 = getOrCreateVPBB(TI->getSuccessor(1)); + assert(SuccVPBB1 && "Successor 1 not found."); + VPBB->setTwoSuccessors(SuccVPBB0, SuccVPBB1); + } else + llvm_unreachable("Number of successors not supported."); + + // Set VPBB predecessors in the same order as they are in the incoming BB. + setVPBBPredsFromBB(VPBB, BB); + } + + // 3. Process outermost loop pre-header and exit. They weren't visited + // during the RPO traversal of the loop body. + BasicBlock *PreheaderBB = TheLoop->getLoopPreheader(); + assert((PreheaderBB->getTerminator()->getNumSuccessors() == 1) && + "Unexpected loop pre-header"); + VPBasicBlock *PreheaderVPBB = getOrCreateVPBB(PreheaderBB); + createRecipesForVPBB(PreheaderVPBB, PreheaderBB); + VPBlockBase *HeaderVPBB = BB2VPBB[TheLoop->getHeader()]; + // Pre-header was already set as Header's predecessor during RPO traversal. + // We only set its successor VPBB now. + PreheaderVPBB->setOneSuccessor(HeaderVPBB); + + // We created an empty VPBB for the loop single exit BB during the RPO + // traversal of the loop body but it wasn't processed because it's not part of + // the the loop. + BasicBlock *LoopExitBB = TheLoop->getUniqueExitBlock(); + assert(LoopExitBB && "Loops with multiple exits are not supported."); + VPBasicBlock *LoopExitVPBB = BB2VPBB[LoopExitBB]; + createRecipesForVPBB(LoopExitVPBB, LoopExitBB); + // Loop exit was already set as successor of the loop exiting BB. + // We only set its predecessor VPBB now. + setVPBBPredsFromBB(LoopExitVPBB, LoopExitBB); + + // 4. Final Top Region setup. Set outermost loop pre-header and single exit as + // Top Region entry and exit. + TopRegion->setEntry(PreheaderVPBB); + TopRegion->setExit(LoopExitVPBB); + return TopRegion; +} + +// Public interface to build a H-CFG. +void VPlanHCFGBuilder::buildHierarchicalCFG(VPlan &Plan) { + // Build Top Region enclosing the plain CFG and set it as VPlan entry. + PlainCFGBuilder PCFGBuilder(TheLoop, LI); + VPRegionBlock *TopRegion = PCFGBuilder.buildPlainCFG(); + Plan.setEntry(TopRegion); + DEBUG(Plan.setName("HCFGBuilder: Plain CFG\n"); dbgs() << Plan); + + Verifier.verifyHierarchicalCFG(TopRegion); +} Index: lib/Transforms/Vectorize/VPlanVerifier.h =================================================================== --- /dev/null +++ lib/Transforms/Vectorize/VPlanVerifier.h @@ -0,0 +1,42 @@ +//===-- VPlanVerifier.h -----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file declare the class VPlanVerifier, which contains utility functions +/// to check the consistency and invariants of a VPlan. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANVERIFIER_H +#define LLVM_TRANSFORMS_VECTORIZE_VPLANVERIFIER_H + +#include "VPlan.h" + +namespace llvm { + +/// Class with utility functions that can be used to check the consistency and +/// invariants of a VPlan, including the components of its H-CFG. +class VPlanVerifier { +public: + // TODO: This class will have class member variables in the future. + + /// Verify that the invariants of the H-CFG starting from \p TopRegion. The + /// verification process comprises the following steps: + /// 1. VPRegionBlock verification: Check the following main CFG invariants for + /// every region of the H-CFG: + /// - Entry/Exit have no predecessors/successors, repectively. + /// - Nested blocks' parent is the region itself. + /// - Linked blocks have a bi-directional link (successor/predecessor). + /// - All predecessors/successors are inside the same region. + /// - Blocks have no duplicated successor/predecessor. + void verifyHierarchicalCFG(const VPRegionBlock *TopRegion) const; +}; +} // namespace llvm + +#endif //LLVM_TRANSFORMS_VECTORIZE_VPLANVERIFIER_H Index: lib/Transforms/Vectorize/VPlanVerifier.cpp =================================================================== --- /dev/null +++ lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -0,0 +1,124 @@ +//===-- VPlanVerifier.cpp -------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the class VPlanVerifier, which contains utility functions +/// to check the consistency and invariants of a VPlan. +/// +//===----------------------------------------------------------------------===// + +#include "VPlanVerifier.h" +#include "llvm/ADT/DepthFirstIterator.h" + +#define DEBUG_TYPE "vplan-verifier" + +using namespace llvm; + +static cl::opt EnableHCFGVerifier("vplan-verify-hcfg", cl::init(false), + cl::Hidden, + cl::desc("Verify VPlan H-CFG.")); + +/// Utility function that checks whether \p VPBlockVec has duplicate +/// VPBlockBases. +static bool hasDuplicates(const SmallVectorImpl &VPBlockVec) { + SmallDenseSet VPBlockSet; + for (const auto *Block : VPBlockVec) { + if (VPBlockSet.count(Block)) + return true; + VPBlockSet.insert(Block); + } + return false; +} + +/// Helper function that verifies the CFG invariants of the VPBlockBases within +/// \p Region. Checks in this function are generic for VPBlockBases. They are +/// not specific for VPBasicBlocks or VPRegionBlocks. +static void verifyBlocksInRegion(const VPRegionBlock *Region) { + for (const VPBlockBase *VPB : + make_range(df_iterator::begin(Region->getEntry()), + df_iterator::end(Region->getExit()))) { + // Check block's parent. + assert(VPB->getParent() == Region && "VPBlockBase has wrong parent"); + + // Check block's successors. + const auto &Successors = VPB->getSuccessors(); + // There must be only one instance of a successor in block's successor list. + // TODO: This won't work for switch statements. + assert(!hasDuplicates(Successors) && + "Multiple instances of the same successor."); + + for (const VPBlockBase *Succ : Successors) { + // There must be a bi-directional link between block and successor. + const auto &SuccPreds = Succ->getPredecessors(); + assert(std::find(SuccPreds.begin(), SuccPreds.end(), VPB) != + SuccPreds.end() && + "Missing predecessor link."); + (void)SuccPreds; + } + + // Check block's predecessors. + const auto &Predecessors = VPB->getPredecessors(); + // There must be only one instance of a predecessor in block's predecessor + // list. + // TODO: This won't work for switch statements. + assert(!hasDuplicates(Predecessors) && + "Multiple instances of the same predecessor."); + + for (const VPBlockBase *Pred : Predecessors) { + // Block and predecessor must be inside the same region. + assert(Pred->getParent() == VPB->getParent() && + "Predecessor is not in the same region."); + + // There must be a bi-directional link between block and predecessor. + const auto &PredSuccs = Pred->getSuccessors(); + assert(std::find(PredSuccs.begin(), PredSuccs.end(), VPB) != + PredSuccs.end() && + "Missing successor link."); + (void)PredSuccs; + } + } +} + +/// Verify the CFG invariants of VPRegionBlock \p Region and its nested +/// VPBlockBases. Do not recurse inside nested VPRegionBlocks. +static void verifyRegion(const VPRegionBlock *Region) { + const VPBlockBase *Entry = Region->getEntry(); + const VPBlockBase *Exit = Region->getExit(); + + // Entry and Exit shouldn't have any predecessor/successor, respectively. + assert(!Entry->getNumPredecessors() && "Region entry has predecessors."); + assert(!Exit->getNumSuccessors() && "Region exit has successors."); + + verifyBlocksInRegion(Region); +} + +/// Verify the CFG invariants of VPRegionBlock \p Region and its nested +/// VPBlockBases. Recurse inside nested VPRegionBlocks. +static void verifyRegionRec(const VPRegionBlock *Region) { + verifyRegion(Region); + + // Recurse inside nested regions. + for (const VPBlockBase *VPB : + make_range(df_iterator::begin(Region->getEntry()), + df_iterator::end(Region->getExit()))) { + if (const auto *SubRegion = dyn_cast(VPB)) + verifyRegionRec(SubRegion); + } +} + +void VPlanVerifier::verifyHierarchicalCFG( + const VPRegionBlock *TopRegion) const { + if (!EnableHCFGVerifier) { + return; + } + + DEBUG(dbgs() << "Verifying VPlan H-CFG.\n"); + assert(!TopRegion->getParent() && "VPlan Top Region should have no parent."); + verifyRegionRec(TopRegion); +} Index: test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll =================================================================== --- /dev/null +++ test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll @@ -0,0 +1,53 @@ +; RUN: opt < %s -loop-vectorize -enable-vplan-native-path -vplan-build-stress-test -vplan-verify-hcfg -debug-only=vplan-verifier -disable-output -S 2>&1 | FileCheck %s -check-prefix=VERIFIER +; RUN: opt < %s -loop-vectorize -enable-vplan-native-path -vplan-build-stress-test -debug-only=vplan-verifier -disable-output -S 2>&1 | FileCheck %s -check-prefix=NO-VERIFIER -allow-empty +; REQUIRES: asserts + +; Verify that the stress testing flag for the VPlan H-CFG builder works as +; expected with and without enabling the VPlan H-CFG Verifier. + +; VERIFIER: Verifying VPlan H-CFG. +; NO-VERIFIER-NOT: Verifying VPlan H-CFG. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @foo(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr #0 { +entry: + %cmp32 = icmp sgt i32 %N, 0 + br i1 %cmp32, label %outer.ph, label %for.end15 + +outer.ph: + %cmp230 = icmp sgt i32 %M, 0 + %0 = sext i32 %M to i64 + %wide.trip.count = zext i32 %M to i64 + %wide.trip.count38 = zext i32 %N to i64 + br label %outer.body + +outer.body: + %indvars.iv35 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next36, %outer.inc ] + br i1 %cmp230, label %inner.ph, label %outer.inc + +inner.ph: + %1 = mul nsw i64 %indvars.iv35, %0 + br label %inner.body + +inner.body: + %indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ] + %2 = add nsw i64 %indvars.iv, %1 + %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2 + %3 = load i32, i32* %arrayidx, align 4 + %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2 + store i32 %3, i32* %arrayidx12, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %outer.inc, label %inner.body + +outer.inc: + %indvars.iv.next36 = add nuw nsw i64 %indvars.iv35, 1 + %exitcond39 = icmp eq i64 %indvars.iv.next36, %wide.trip.count38 + br i1 %exitcond39, label %for.end15, label %outer.body + +for.end15: + ret void +} + +attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }