Index: lib/Transforms/Vectorize/CMakeLists.txt =================================================================== --- lib/Transforms/Vectorize/CMakeLists.txt +++ lib/Transforms/Vectorize/CMakeLists.txt @@ -4,6 +4,8 @@ SLPVectorizer.cpp Vectorize.cpp VPlan.cpp + VPlanHCFGBuilder.cpp + VPlanVerifier.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms Index: lib/Transforms/Vectorize/LoopVectorizationPlanner.h =================================================================== --- lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -39,23 +39,94 @@ VPBasicBlock::iterator InsertPt = VPBasicBlock::iterator(); VPInstruction *createInstruction(unsigned Opcode, - std::initializer_list Operands) { + ArrayRef Operands) { VPInstruction *Instr = new VPInstruction(Opcode, Operands); - BB->insert(Instr, InsertPt); + if (BB) + BB->insert(Instr, InsertPt); return Instr; } + VPInstruction *createInstruction(unsigned Opcode, + std::initializer_list Operands) { + return createInstruction(Opcode, ArrayRef(Operands)); + } + public: VPBuilder() {} - /// \brief This specifies that created VPInstructions should be appended to - /// the end of the specified block. + /// Clear the insertion point: created instructions will not be inserted into + /// a block. + void clearInsertionPoint() { + BB = nullptr; + InsertPt = VPBasicBlock::iterator(); + } + + VPBasicBlock *getInsertBlock() const { return BB; } + VPBasicBlock::iterator getInsertPoint() const { return InsertPt; } + + /// InsertPoint - A saved insertion point. + class VPInsertPoint { + VPBasicBlock *Block = nullptr; + VPBasicBlock::iterator Point; + + public: + /// Creates a new insertion point which doesn't point to anything. + VPInsertPoint() = default; + + /// Creates a new insertion point at the given location. + VPInsertPoint(VPBasicBlock *InsertBlock, VPBasicBlock::iterator InsertPoint) + : Block(InsertBlock), Point(InsertPoint) {} + + /// Returns true if this insert point is set. + bool isSet() const { return (Block != nullptr); } + + VPBasicBlock *getBlock() const { return Block; } + VPBasicBlock::iterator getPoint() const { return Point; } + }; + + /// Sets the current insert point to a previously-saved location. + void restoreIP(VPInsertPoint IP) { + if (IP.isSet()) + setInsertPoint(IP.getBlock(), IP.getPoint()); + else + clearInsertionPoint(); + } + + /// This specifies that created VPInstructions should be appended to the end + /// of the specified block. void setInsertPoint(VPBasicBlock *TheBB) { assert(TheBB && "Attempting to set a null insert point"); BB = TheBB; InsertPt = BB->end(); } + /// This specifies that created instructions should be inserted at the + /// specified point. + void setInsertPoint(VPBasicBlock *TheBB, VPBasicBlock::iterator IP) { + BB = TheBB; + InsertPt = IP; + } + + /// Insert and return the specified instruction. + VPInstruction *insert(VPInstruction *I) const { + BB->insert(I, InsertPt); + return I; + } + + /// Create an N-ary operation with \p Opcode, \p Operands and set \p Inst as + /// its underlying Instruction. + VPValue *createNaryOp(unsigned Opcode, ArrayRef Operands, + Instruction *Inst = nullptr) { + VPInstruction *NewVPInst = createInstruction(Opcode, Operands); + NewVPInst->setUnderlyingValue(Inst); + return NewVPInst; + } + VPValue *createNaryOp(unsigned Opcode, + std::initializer_list Operands, + Instruction *Inst = nullptr) { + return createNaryOp(Opcode, ArrayRef(Operands), Inst); + } + VPValue *createNot(VPValue *Operand) { return createInstruction(VPInstruction::Not, {Operand}); } @@ -67,6 +138,29 @@ VPValue *createOr(VPValue *LHS, VPValue *RHS) { return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS}); } + + //===--------------------------------------------------------------------===// + // RAII helpers. + //===--------------------------------------------------------------------===// + + /// RAII object that stores the current insertion point and restores it when + /// the object is destroyed. + class InsertPointGuard { + VPBuilder &Builder; + VPBasicBlock* Block; + VPBasicBlock::iterator Point; + + public: + InsertPointGuard(VPBuilder &B) + : Builder(B), Block(B.getInsertBlock()), Point(B.getInsertPoint()) {} + + InsertPointGuard(const InsertPointGuard &) = delete; + InsertPointGuard &operator=(const InsertPointGuard &) = delete; + + ~InsertPointGuard() { + Builder.restoreIP(VPInsertPoint(Block, Point)); + } + }; }; Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -56,6 +56,7 @@ #include "llvm/Transforms/Vectorize/LoopVectorize.h" #include "LoopVectorizationPlanner.h" +#include "VPlanHCFGBuilder.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" @@ -264,6 +265,17 @@ cl::desc("Enable VPlan-native vectorization path with " "support for outer loop vectorization.")); +// This flag enables the stress testing of the VPlan H-CFG construction in the +// VPlan-native vectorization path. It must be used in conjuction with +// -enable-vplan-native-path. -vplan-verify-hcfg can also be used to enable the +// verification of the H-CFGs built. +static cl::opt VPlanBuildStressTest( + "vplan-build-stress-test", cl::init(false), cl::Hidden, + cl::desc( + "Build VPlan for every supported loop nest in the function and bail " + "out right after the build (stress test the VPlan H-CFG construction " + "in the VPlan-native vectorization path).")); + /// Create an analysis remark that explains why vectorization failed /// /// \p PassName is the name of the pass (e.g. can be AlwaysPrint). \p @@ -2355,8 +2367,11 @@ OptimizationRemarkEmitter *ORE, SmallVectorImpl &V) { // Collect inner loops and outer loops without irreducible control flow. For - // now, only collect outer loops that have explicit vectorization hints. - if (L.empty() || (EnableVPlanNativePath && isExplicitVecOuterLoop(&L, ORE))) { + // now, only collect outer loops that have explicit vectorization hints. If we + // are stress testing the VPlan H-CFG construction, we collect the outermost + // loop of every loop nest. + if (L.empty() || VPlanBuildStressTest || + (EnableVPlanNativePath && isExplicitVecOuterLoop(&L, ORE))) { LoopBlocksRPO RPOT(&L); RPOT.perform(LI); if (!containsIrreducibleCFG(RPOT, *LI)) { @@ -7691,12 +7706,22 @@ // Since we cannot modify the incoming IR, we need to build VPlan upfront in // the vectorization pipeline. if (!OrigLoop->empty()) { + // TODO: If UserVF is not provided, we set UserVF to 4 for stress testing. + // This won't be necessary when UserVF is not required in the VPlan-native + // path. + if (VPlanBuildStressTest && !UserVF) + UserVF = 4; + assert(EnableVPlanNativePath && "VPlan-native path is not enabled."); assert(UserVF && "Expected UserVF for outer loop vectorization."); assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two"); DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n"); buildVPlans(UserVF, UserVF); + // For VPlan build stress testing, we bail out after VPlan construction. + if (VPlanBuildStressTest) + return NoVectorization; + return {UserVF, 0}; } @@ -8233,9 +8258,11 @@ "VPBB has successors when handling predicated replication."); // Record predicated instructions for above packing optimizations. PredInst2Recipe[I] = Recipe; - VPBlockBase *Region = - VPBB->setOneSuccessor(createReplicateRegion(I, Recipe, Plan)); - return cast(Region->setOneSuccessor(new VPBasicBlock())); + VPBlockBase *Region = createReplicateRegion(I, Recipe, Plan); + VPBlockUtils::insertBlockAfter(Region, VPBB); + auto *RegSucc = new VPBasicBlock(); + VPBlockUtils::insertBlockAfter(RegSucc, Region); + return RegSucc; } VPRegionBlock * @@ -8261,8 +8288,8 @@ // Note: first set Entry as region entry and then connect successors starting // from it in order, to propagate the "parent" of each VPBasicBlock. - Entry->setTwoSuccessors(Pred, Exit); - Pred->setOneSuccessor(Exit); + VPBlockUtils::insertTwoBlocksAfter(Pred, Exit, Entry); + VPBlockUtils::connectBlocks(Pred, Exit); return Region; } @@ -8279,6 +8306,11 @@ // Create new empty VPlan auto Plan = llvm::make_unique(); + + // Build hierarchical CFG + VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI); + HCFGBuilder.buildHierarchicalCFG(*Plan.get()); + return Plan; } @@ -8320,7 +8352,7 @@ // ingredients and fill a new VPBasicBlock. unsigned VPBBsForBB = 0; auto *FirstVPBBForBB = new VPBasicBlock(BB->getName()); - VPBB->setOneSuccessor(FirstVPBBForBB); + VPBlockUtils::insertBlockAfter(FirstVPBBForBB, VPBB); VPBB = FirstVPBBForBB; Builder.setInsertPoint(VPBB); @@ -8424,7 +8456,7 @@ VPBasicBlock *PreEntry = cast(Plan->getEntry()); assert(PreEntry->empty() && "Expecting empty pre-entry block."); VPBlockBase *Entry = Plan->setEntry(PreEntry->getSingleSuccessor()); - PreEntry->disconnectSuccessor(Entry); + VPBlockUtils::disconnectBlocks(PreEntry, Entry); delete PreEntry; std::string PlanName; Index: lib/Transforms/Vectorize/VPlan.h =================================================================== --- lib/Transforms/Vectorize/VPlan.h +++ lib/Transforms/Vectorize/VPlan.h @@ -306,6 +306,8 @@ /// VPBlockBase is the building block of the Hierarchical Control-Flow Graph. /// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock. class VPBlockBase { + friend class VPBlockUtils; + private: const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast). @@ -372,6 +374,7 @@ /// for any other purpose, as the values may change as LLVM evolves. unsigned getVPBlockID() const { return SubclassID; } + VPRegionBlock *getParent() { return Parent; } const VPRegionBlock *getParent() const { return Parent; } void setParent(VPRegionBlock *P) { Parent = P; } @@ -406,6 +409,9 @@ return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr); } + size_t getNumSuccessors() const { return Successors.size(); } + size_t getNumPredecessors() const { return Predecessors.size(); } + /// An Enclosing Block of a block B is any block containing B, including B /// itself. \return the closest enclosing block starting from "this", which /// has successors. \return the root enclosing block if all enclosing blocks @@ -449,34 +455,31 @@ return getEnclosingBlockWithPredecessors()->getSinglePredecessor(); } - /// Sets a given VPBlockBase \p Successor as the single successor and \return - /// \p Successor. The parent of this Block is copied to be the parent of - /// \p Successor. - VPBlockBase *setOneSuccessor(VPBlockBase *Successor) { + /// Set a given VPBlockBase \p Successor as the single successor of this + /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor. + /// This VPBlockBase must have no successors. + void setOneSuccessor(VPBlockBase *Successor) { assert(Successors.empty() && "Setting one successor when others exist."); appendSuccessor(Successor); - Successor->appendPredecessor(this); - Successor->Parent = Parent; - return Successor; } - /// Sets two given VPBlockBases \p IfTrue and \p IfFalse to be the two - /// successors. The parent of this Block is copied to be the parent of both - /// \p IfTrue and \p IfFalse. + /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two + /// successors of this VPBlockBase. This VPBlockBase is not added as + /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no + /// successors. void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) { assert(Successors.empty() && "Setting two successors when others exist."); appendSuccessor(IfTrue); appendSuccessor(IfFalse); - IfTrue->appendPredecessor(this); - IfFalse->appendPredecessor(this); - IfTrue->Parent = Parent; - IfFalse->Parent = Parent; } - void disconnectSuccessor(VPBlockBase *Successor) { - assert(Successor && "Successor to disconnect is null."); - removeSuccessor(Successor); - Successor->removePredecessor(this); + /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase. + /// This VPBlockBase must have no predecessors. This VPBlockBase is not added + /// as successor of any VPBasicBlock in \p NewPreds. + void setPredecessors(ArrayRef NewPreds) { + assert(Predecessors.empty() && "Block predecessors already set."); + for (auto *Pred : NewPreds) + appendPredecessor(Pred); } /// The method which generates the output IR that correspond to this @@ -554,10 +557,13 @@ void generateInstruction(VPTransformState &State, unsigned Part); public: - VPInstruction(unsigned Opcode, std::initializer_list Operands) + VPInstruction(unsigned Opcode, ArrayRef Operands) : VPUser(VPValue::VPInstructionSC, Operands), VPRecipeBase(VPRecipeBase::VPInstructionSC), Opcode(Opcode) {} + VPInstruction(unsigned Opcode, std::initializer_list Operands) + : VPInstruction(Opcode, ArrayRef(Operands)) {} + /// Method to support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const VPValue *V) { return V->getVPValueID() == VPValue::VPInstructionSC; @@ -963,6 +969,9 @@ Entry->setParent(this); Exit->setParent(this); } + VPRegionBlock(const std::string &Name = "", bool IsReplicator = false) + : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exit(nullptr), + IsReplicator(IsReplicator) {} ~VPRegionBlock() override { if (Entry) @@ -977,9 +986,27 @@ const VPBlockBase *getEntry() const { return Entry; } VPBlockBase *getEntry() { return Entry; } + /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p + /// EntryBlock must have no predecessors. + void setEntry(VPBlockBase *EntryBlock) { + assert(EntryBlock->getPredecessors().empty() && + "Entry block cannot have predecessors."); + Entry = EntryBlock; + EntryBlock->setParent(this); + } + const VPBlockBase *getExit() const { return Exit; } VPBlockBase *getExit() { return Exit; } + /// Set \p ExitBlock as the exit VPBlockBase of this VPRegionBlock. \p + /// ExitBlock must have no successors. + void setExit(VPBlockBase *ExitBlock) { + assert(ExitBlock->getSuccessors().empty() && + "Exit block cannot have successors."); + Exit = ExitBlock; + ExitBlock->setParent(this); + } + /// An indicator whether this region is to generate multiple replicated /// instances of output IR corresponding to its VPBlockBases. bool isReplicator() const { return IsReplicator; } @@ -1007,6 +1034,13 @@ /// Holds the name of the VPlan, for printing. std::string Name; + /// Holds all the external definitions created for this VPlan. + // TODO: Introduce a specific representation for external definitions in + // VPlan. External definitions must be immutable and hold a pointer to its + // underlying IR that will be used to implement its structural comparison + // (operators '==' and '<'). + SmallSet VPExternalDefs; + /// Holds a mapping between Values and their corresponding VPValue inside /// VPlan. Value2VPValueTy Value2VPValue; @@ -1037,6 +1071,12 @@ void setName(const Twine &newName) { Name = newName.str(); } + /// Add \p VPVal to the pool of external definitions if it's not already + /// in the pool. + void addExternalDef(VPValue *VPVal) { + VPExternalDefs.insert(VPVal); + } + void addVPValue(Value *V) { assert(V && "Trying to add a null Value to VPlan"); assert(!Value2VPValue.count(V) && "Value already exists in VPlan"); @@ -1184,6 +1224,70 @@ } }; +//===----------------------------------------------------------------------===// +// VPlan Utilities +//===----------------------------------------------------------------------===// + +/// Class that provides utilities for VPBlockBases in VPlan. +class VPBlockUtils { +public: + VPBlockUtils() = delete; + + /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p + /// NewBlock as successor of \p BlockPtr and \p Block as predecessor of \p + /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p NewBlock + /// must have neither successors nor predecessors. + static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) { + assert(NewBlock->getSuccessors().empty() && + "Can't insert new block with successors."); + // TODO: move successors from BlockPtr to NewBlock when this functionality + // is necessary. For now, setBlockSingleSuccessor will assert if BlockPtr + // already has successors. + BlockPtr->setOneSuccessor(NewBlock); + NewBlock->setPredecessors({BlockPtr}); + NewBlock->setParent(BlockPtr->getParent()); + } + + /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p + /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p + /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr + /// parent to \p IfTrue and \p IfFalse. \p BlockPtr must have no successors + /// and \p IfTrue and \p IfFalse must have neither successors nor + /// predecessors. + static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, + VPBlockBase *BlockPtr) { + assert(IfTrue->getSuccessors().empty() && + "Can't insert IfTrue with successors."); + assert(IfFalse->getSuccessors().empty() && + "Can't insert IfFalse with successors."); + BlockPtr->setTwoSuccessors(IfTrue, IfFalse); + IfTrue->setPredecessors({BlockPtr}); + IfFalse->setPredecessors({BlockPtr}); + IfTrue->setParent(BlockPtr->getParent()); + IfFalse->setParent(BlockPtr->getParent()); + } + + /// Connect VPBlockBases \p From and \p To bi-directionally. Append \p To to + /// the successors of \p From and \p From to the predecessors of \p To. Both + /// VPBlockBases must have the same parent, which can be null. Both + /// VPBlockBases can be already connected to other VPBlockBases. + static void connectBlocks(VPBlockBase *From, VPBlockBase *To) { + assert((From->getParent() == To->getParent()) && + "Can't connect two block with different parents"); + assert(From->getNumSuccessors() < 2 && + "Blocks can't have more than two successors."); + From->appendSuccessor(To); + To->appendPredecessor(From); + } + + /// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To + /// from the successors of \p From and \p From from the predecessors of \p To. + static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To) { + assert(To && "Successor to disconnect is null."); + From->removeSuccessor(To); + To->removePredecessor(From); + } +}; } // end namespace llvm #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H Index: lib/Transforms/Vectorize/VPlanHCFGBuilder.h =================================================================== --- /dev/null +++ lib/Transforms/Vectorize/VPlanHCFGBuilder.h @@ -0,0 +1,55 @@ +//===-- VPlanHCFGBuilder.h --------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the VPlanHCFGBuilder class which contains the public +/// interface (buildHierarchicalCFG) to build a VPlan-based Hierarchical CFG +/// (H-CFG) for an incoming IR. +/// +/// A H-CFG in VPlan is a control-flow graph whose nodes are VPBasicBlocks +/// and/or VPRegionBlocks (i.e., other H-CFGs). The outermost H-CFG of a VPlan +/// consists of a VPRegionBlock, denoted Top Region, which encloses any other +/// VPBlockBase in the H-CFG. This guarantees that any VPBlockBase in the H-CFG +/// other than the Top Region will have a parent VPRegionBlock and allows us +/// to easily add more nodes before/after the main vector loop (such as the +/// reduction epilogue). +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_VPLANHCFGBUILDER_H +#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_VPLANHCFGBUILDER_H + +#include "VPlan.h" +#include "VPlanVerifier.h" + +namespace llvm { + +class Loop; + +/// Main class to build the VPlan H-CFG for an incoming IR. +class VPlanHCFGBuilder { +private: + // The outermost loop of the input loop nest considered for vectorization. + Loop *TheLoop; + + // Loop Info analysis. + LoopInfo *LI; + + // VPlan verifier utility. + VPlanVerifier Verifier; + +public: + VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI) : TheLoop(Lp), LI(LI) {} + + /// Build H-CFG for TheLoop and update \p Plan accordingly. + void buildHierarchicalCFG(VPlan &Plan); +}; +} // namespace llvm + +#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_VPLANHCFGBUILDER_H Index: lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp =================================================================== --- /dev/null +++ lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp @@ -0,0 +1,320 @@ +//===-- VPlanHCFGBuilder.cpp ----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements the construction of a VPlan-based Hierarchical CFG +/// (H-CFG) for an incoming IR. This construction comprises the following +/// components and steps: +// +/// 1. PlainCFGBuilder class: builds a plain VPBasicBlock-based CFG that +/// faithfully represents the CFG in the incoming IR. A VPRegionBlock (Top +/// Region) is created to enclose and serve as parent of all the VPBasicBlocks +/// in the plain CFG. +/// NOTE: At this point, there is a direct correspondence between all the +/// VPBasicBlocks created for the initial plain CFG and the incoming +/// BasicBlocks. However, this might change in the future. +/// +//===----------------------------------------------------------------------===// + +#include "VPlanHCFGBuilder.h" +#include "LoopVectorizationPlanner.h" +#include "llvm/Analysis/LoopIterator.h" + +#define DEBUG_TYPE "vplan-hcfg-builder" + +using namespace llvm; + +// Class that is used to build the plain CFG for the incoming IR. +class PlainCFGBuilder { +private: + // The outermost loop of the input loop nest considered for vectorization. + Loop *TheLoop; + + // Loop Info analysis. + LoopInfo *LI; + + // Vectorization plan that we are working on. + VPlan &Plan; + + // Output Top Region. + VPRegionBlock *TopRegion = nullptr; + + // Builder of the VPlan instruction-level representation. + VPBuilder VPIRBuilder; + + // NOTE: The following maps are intentionally destroyed after the plain CFG + // construction because subsequent VPlan-to-VPlan transformation may + // invalidate them. + // Map incoming BasicBlocks to their newly-created VPBasicBlocks. + DenseMap BB2VPBB; + // Map incoming Value definitions to their newly-created VPValues. + DenseMap IRDef2VPValue; + + // Hold phi node's that need to be fixed once the plain CFG has been built. + SmallVector PhisToFix; + + // Utility functions. + void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB); + void fixPhiNodes(); + VPBasicBlock *getOrCreateVPBB(BasicBlock *BB); + bool isExternalDef(Value *Val); + VPValue *getOrCreateVPOperand(Value *IRVal); + void createVPInstructionsForVPBB(VPBasicBlock *VPBB, BasicBlock *BB); + +public: + PlainCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P) + : TheLoop(Lp), LI(LI), Plan(P) {} + + // Build the plain CFG and return its Top Region. + VPRegionBlock *buildPlainCFG(); +}; + +// Return true if \p Inst is an incoming Instruction to be ignored in the VPlan +// representation. +static bool isInstructionToIgnore(Instruction *Inst) { + return isa(Inst); +} + +// Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB +// must have no predecessors. +void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) { + SmallVector VPBBPreds; + // Collect VPBB predecessors. + for (BasicBlock *Pred : predecessors(BB)) + VPBBPreds.push_back(getOrCreateVPBB(Pred)); + + VPBB->setPredecessors(VPBBPreds); +} + +// Add operands to VPInstructions representing phi nodes from the input IR. +void PlainCFGBuilder::fixPhiNodes() { + for (auto *Phi : PhisToFix) { + assert(IRDef2VPValue.count(Phi) && "Missing VPInstruction for PHINode."); + VPValue * VPVal = IRDef2VPValue[Phi]; + assert(isa(VPVal) && "Expected VPInstruction for phi node."); + auto * VPPhi = cast(VPVal); + assert(VPPhi->getNumOperands() == 0 && + "Expected VPInstruction with no operands."); + + for (Value *Op : Phi->operands()) + VPPhi->addOperand(getOrCreateVPOperand(Op)); + } +} + +// Create a new empty VPBasicBlock for an incoming BasicBlock or retrieve an +// existing one if it was already created. +VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) { + auto BlockIt = BB2VPBB.find(BB); + if (BlockIt != BB2VPBB.end()) + // Retrieve existing VPBB. + return BlockIt->second; + + // Create new VPBB. + DEBUG(dbgs() << "Creating VPBasicBlock for " << BB->getName() << "\n"); + VPBasicBlock *VPBB = new VPBasicBlock(BB->getName()); + BB2VPBB[BB] = VPBB; + VPBB->setParent(TopRegion); + return VPBB; +} + +// Return true if \p Val is considered an external definition. An external +// definition is either: +// 1. A Value that is not an Instruction. This will be refined in the future. +// 2. An Instruction that is outside of the CFG snippet represented in VPlan, +// i.e., is not part of: a) the loop nest, b) outermost loop PH and, c) +// outermost loop exits. +bool PlainCFGBuilder::isExternalDef(Value *Val) { + // All the Values that are not Instructions are considered external + // definitions for now. + Instruction *Inst = dyn_cast(Val); + if (!Inst) + return true; + + BasicBlock *InstParent = Inst->getParent(); + assert(InstParent && "Expected instruction parent."); + + // Check whether Instruction definition is in loop PH. + BasicBlock *PH = TheLoop->getLoopPreheader(); + assert(PH && "Expected loop pre-header."); + + if (InstParent == PH) + // Instruction definition is in outermost loop PH. + return false; + + // Check whether Instruction definition is in the loop exit. + BasicBlock *Exit = TheLoop->getUniqueExitBlock(); + assert(Exit && "Expected loop with single exit."); + if (InstParent == Exit) { + // Instruction definition is in outermost loop exit. + return false; + } + + // Check whether Instruction definition is in loop body. + return !TheLoop->contains(Inst); +} + +// Create a new VPValue or retrieve an existing one for the Instruction's +// operand \p IRVal. This function must only be used to create/retrieve VPValues +// for *Instruction's operands* and not to create regular VPInstruction's. For +// the latter, please, look at 'createVPInstructionsForVPBB'. +VPValue *PlainCFGBuilder::getOrCreateVPOperand(Value *IRVal) { + auto VPValIt = IRDef2VPValue.find(IRVal); + if (VPValIt != IRDef2VPValue.end()) + // Operand has an associated VPInstruction or VPValue that was previously + // created. + return VPValIt->second; + + // Operand doesn't have a previously created VPInstruction/VPValue. This + // means that operand is: + // A) a definition external to VPlan, + // B) any other Value without specific representation in VPlan. + // For now, we use VPValue to represent A and B and classify both as external + // definitions. We may introduce specific VPValue subclasses for them in the + // future. + assert(isExternalDef(IRVal) && "Expected external definition as operand."); + + // A and B: Create VPValue and add it to the pool of external definitions and + // to the Value->VPValue map. + VPValue *NewVPVal = new VPValue(IRVal); + Plan.addExternalDef(NewVPVal); + IRDef2VPValue[IRVal] = NewVPVal; + return NewVPVal; +} + +// Create new VPInstructions in a VPBasicBlock, given its BasicBlock +// counterpart. This function must be invoked in RPO so that the operands of a +// VPInstruction in \p BB have been visited before (except for Phi nodes). +void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB, + BasicBlock *BB) { + VPIRBuilder.setInsertPoint(VPBB); + for (Instruction &InstRef : *BB) { + Instruction *Inst = &InstRef; + if (isInstructionToIgnore(Inst)) + continue; + + // There should't be any VPValue for Inst at this point. Otherwise, we + // visited Inst when we shouldn't, breaking the RPO traversal order. + assert(!IRDef2VPValue.count(Inst) && + "Instruction shouldn't have been visited."); + + VPInstruction *NewVPInst; + if (PHINode *Phi = dyn_cast(Inst)) { + // Phi node's operands may have not been visited at this point. We create + // an empty VPInstruction that we will fix once the whole plain CFG has + // been built. + NewVPInst = cast(VPIRBuilder.createNaryOp( + Inst->getOpcode(), {} /*No operands*/, Inst)); + PhisToFix.push_back(Phi); + } else { + // Translate LLVM-IR operands into VPValue operands and set them in the + // new VPInstruction. + SmallVector VPOperands; + for (Value *Op : Inst->operands()) + VPOperands.push_back(getOrCreateVPOperand(Op)); + + // Build VPInstruction for any arbitraty Instruction without specific + // representation in VPlan. + NewVPInst = cast( + VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst)); + } + + IRDef2VPValue[Inst] = NewVPInst; + } +} + +// Main interface to build the plain CFG. +VPRegionBlock *PlainCFGBuilder::buildPlainCFG() { + // 1. Create the Top Region. It will be the parent of all VPBBs. + TopRegion = new VPRegionBlock("TopRegion", false /*isReplicator*/); + + // 2. Scan the body of the loop in a topological order to visit each basic + // block after having visited its predecessor basic blocks.Create a VPBB for + // each BB and link it to its successor and predecessor VPBBs. Note that + // predecessors must be set in the same order as they are in the incomming IR. + // Otherwise, there might be problems with existing phi nodes and algorithm + // based on predecessors traversal. + + // Loop PH needs to be explicitly visited since it's not taken into account by + // LoopBlocksDFS. + BasicBlock *PreheaderBB = TheLoop->getLoopPreheader(); + assert((PreheaderBB->getTerminator()->getNumSuccessors() == 1) && + "Unexpected loop preheader"); + VPBasicBlock *PreheaderVPBB = getOrCreateVPBB(PreheaderBB); + createVPInstructionsForVPBB(PreheaderVPBB, PreheaderBB); + // Create empty VPBB for Loop H so that we can link PH->H. + VPBlockBase *HeaderVPBB = getOrCreateVPBB(TheLoop->getHeader()); + // Preheader's predecessors will be set during the loop RPO traversal below. + PreheaderVPBB->setOneSuccessor(HeaderVPBB); + + LoopBlocksRPO RPO(TheLoop); + RPO.perform(LI); + + for (BasicBlock *BB : RPO) { + // Create or retrieve the VPBasicBlock for this BB and create its + // VPInstructions. + VPBasicBlock *VPBB = getOrCreateVPBB(BB); + createVPInstructionsForVPBB(VPBB, BB); + + // Set VPBB successors. We create empty VPBBs for successors if they don't + // exist already. Recipes will be created when the successor is visited + // during the RPO traversal. + TerminatorInst *TI = BB->getTerminator(); + assert(TI && "Terminator expected."); + unsigned NumSuccs = TI->getNumSuccessors(); + + if (NumSuccs == 1) { + VPBasicBlock *SuccVPBB = getOrCreateVPBB(TI->getSuccessor(0)); + assert(SuccVPBB && "VPBB Successor not found."); + VPBB->setOneSuccessor(SuccVPBB); + } else if (NumSuccs == 2) { + VPBasicBlock *SuccVPBB0 = getOrCreateVPBB(TI->getSuccessor(0)); + assert(SuccVPBB0 && "Successor 0 not found."); + VPBasicBlock *SuccVPBB1 = getOrCreateVPBB(TI->getSuccessor(1)); + assert(SuccVPBB1 && "Successor 1 not found."); + VPBB->setTwoSuccessors(SuccVPBB0, SuccVPBB1); + } else + llvm_unreachable("Number of successors not supported."); + + // Set VPBB predecessors in the same order as they are in the incoming BB. + setVPBBPredsFromBB(VPBB, BB); + } + + // 3. Process outermost loop exit. We created an empty VPBB for the loop + // single exit BB during the RPO traversal of the loop body but Instructions + // weren't visited because it's not part of the the loop. + BasicBlock *LoopExitBB = TheLoop->getUniqueExitBlock(); + assert(LoopExitBB && "Loops with multiple exits are not supported."); + VPBasicBlock *LoopExitVPBB = BB2VPBB[LoopExitBB]; + createVPInstructionsForVPBB(LoopExitVPBB, LoopExitBB); + // Loop exit was already set as successor of the loop exiting BB. + // We only set its predecessor VPBB now. + setVPBBPredsFromBB(LoopExitVPBB, LoopExitBB); + + // 4. The whole CFG has been built at this point so all the input Values must + // have a VPlan couterpart. Fix VPlan phi nodes by adding their corresponding + // VPlan operands. + fixPhiNodes(); + + // 5. Final Top Region setup. Set outermost loop pre-header and single exit as + // Top Region entry and exit. + TopRegion->setEntry(PreheaderVPBB); + TopRegion->setExit(LoopExitVPBB); + return TopRegion; +} + +// Public interface to build a H-CFG. +void VPlanHCFGBuilder::buildHierarchicalCFG(VPlan &Plan) { + // Build Top Region enclosing the plain CFG and set it as VPlan entry. + PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan); + VPRegionBlock *TopRegion = PCFGBuilder.buildPlainCFG(); + Plan.setEntry(TopRegion); + DEBUG(Plan.setName("HCFGBuilder: Plain CFG\n"); dbgs() << Plan); + + Verifier.verifyHierarchicalCFG(TopRegion); +} Index: lib/Transforms/Vectorize/VPlanValue.h =================================================================== --- lib/Transforms/Vectorize/VPlanValue.h +++ lib/Transforms/Vectorize/VPlanValue.h @@ -37,13 +37,34 @@ // coming from the input IR, instructions which VPlan will generate if executed // and live-outs which the VPlan will need to fix accordingly. class VPValue { + friend class VPBuilder; private: const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast). SmallVector Users; protected: - VPValue(const unsigned char SC) : SubclassID(SC) {} + // Hold the underlying Value, if any, attached to this VPValue. + Value * UnderlyingVal; + + VPValue(const unsigned char SC, Value *UV = nullptr) + : SubclassID(SC), UnderlyingVal(UV) {} + + // DESIGN PRINCIPLE: Access to the underlying IR must be strictly limited to + // the front-end and back-end of VPlan so that the middle-end is as + // independent as possible of the underlying IR. We grant access to the + // underlying IR using friendship. In that way, we should be able to use VPlan + // for multiple underlying IRs (Polly?) by providing a new VPlan front-end, + // back-end and analysis information for the new IR. + + /// Return the underlying Value attached to this VPValue. + Value *getUnderlyingValue() { return UnderlyingVal; } + + // Set \p Val as the underlying Value of this VPValue. + void setUnderlyingValue(Value *Val) { + assert(!UnderlyingVal && "Underlying Value is already set."); + UnderlyingVal = Val; + } public: /// An enumeration for keeping track of the concrete subclass of VPValue that @@ -52,7 +73,7 @@ /// type identification. enum { VPValueSC, VPUserSC, VPInstructionSC }; - VPValue() : SubclassID(VPValueSC) {} + VPValue(Value *UV = nullptr) : VPValue(VPValueSC, UV) {} VPValue(const VPValue &) = delete; VPValue &operator=(const VPValue &) = delete; @@ -94,11 +115,6 @@ private: SmallVector Operands; - void addOperand(VPValue *Operand) { - Operands.push_back(Operand); - Operand->addUser(*this); - } - protected: VPUser(const unsigned char SC) : VPValue(SC) {} VPUser(const unsigned char SC, ArrayRef Operands) : VPValue(SC) { @@ -120,6 +136,11 @@ V->getVPValueID() <= VPInstructionSC; } + void addOperand(VPValue *Operand) { + Operands.push_back(Operand); + Operand->addUser(*this); + } + unsigned getNumOperands() const { return Operands.size(); } inline VPValue *getOperand(unsigned N) const { assert(N < Operands.size() && "Operand index out of bounds"); Index: lib/Transforms/Vectorize/VPlanVerifier.h =================================================================== --- /dev/null +++ lib/Transforms/Vectorize/VPlanVerifier.h @@ -0,0 +1,44 @@ +//===-- VPlanVerifier.h -----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file declares the class VPlanVerifier, which contains utility functions +/// to check the consistency of a VPlan. This includes the following kinds of +/// invariants: +/// +/// 1. Region/Block invariants: +/// - Region's entry/exit block must have no predecessors/successors, +/// respectively. +/// - Block's parent must be the region immediately containing the block. +/// - Linked blocks must have a bi-directional link (successor/predecessor). +/// - All predecessors/successors of a block must belong to the same region. +/// - Blocks must have no duplicated successor/predecessor. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLANVERIFIER_H +#define LLVM_TRANSFORMS_VECTORIZE_VPLANVERIFIER_H + +#include "VPlan.h" + +namespace llvm { + +/// Class with utility functions that can be used to check the consistency and +/// invariants of a VPlan, including the components of its H-CFG. +class VPlanVerifier { +public: + /// Verify the invariants of the H-CFG starting from \p TopRegion. The + /// verification process comprises the following steps: + /// 1. Region/Block verification: Check the Region/Block verification + /// invariants for every region in the H-CFG. + void verifyHierarchicalCFG(const VPRegionBlock *TopRegion) const; +}; +} // namespace llvm + +#endif //LLVM_TRANSFORMS_VECTORIZE_VPLANVERIFIER_H Index: lib/Transforms/Vectorize/VPlanVerifier.cpp =================================================================== --- /dev/null +++ lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -0,0 +1,125 @@ +//===-- VPlanVerifier.cpp -------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the class VPlanVerifier, which contains utility functions +/// to check the consistency and invariants of a VPlan. +/// +//===----------------------------------------------------------------------===// + +#include "VPlanVerifier.h" +#include "llvm/ADT/DepthFirstIterator.h" + +#define DEBUG_TYPE "vplan-verifier" + +using namespace llvm; + +static cl::opt EnableHCFGVerifier("vplan-verify-hcfg", cl::init(false), + cl::Hidden, + cl::desc("Verify VPlan H-CFG.")); + +/// Utility function that checks whether \p VPBlockVec has duplicate +/// VPBlockBases. +static bool hasDuplicates(const SmallVectorImpl &VPBlockVec) { + SmallDenseSet VPBlockSet; + for (const auto *Block : VPBlockVec) { + if (VPBlockSet.count(Block)) + return true; + VPBlockSet.insert(Block); + } + return false; +} + +/// Helper function that verifies the CFG invariants of the VPBlockBases within +/// \p Region. Checks in this function are generic for VPBlockBases. They are +/// not specific for VPBasicBlocks or VPRegionBlocks. +static void verifyBlocksInRegion(const VPRegionBlock *Region) { + for (const VPBlockBase *VPB : + make_range(df_iterator::begin(Region->getEntry()), + df_iterator::end(Region->getExit()))) { + // Check block's parent. + assert(VPB->getParent() == Region && "VPBlockBase has wrong parent"); + + // Check block's successors. + const auto &Successors = VPB->getSuccessors(); + // There must be only one instance of a successor in block's successor list. + // TODO: This won't work for switch statements. + assert(!hasDuplicates(Successors) && + "Multiple instances of the same successor."); + + for (const VPBlockBase *Succ : Successors) { + // There must be a bi-directional link between block and successor. + const auto &SuccPreds = Succ->getPredecessors(); + assert(std::find(SuccPreds.begin(), SuccPreds.end(), VPB) != + SuccPreds.end() && + "Missing predecessor link."); + (void)SuccPreds; + } + + // Check block's predecessors. + const auto &Predecessors = VPB->getPredecessors(); + // There must be only one instance of a predecessor in block's predecessor + // list. + // TODO: This won't work for switch statements. + assert(!hasDuplicates(Predecessors) && + "Multiple instances of the same predecessor."); + + for (const VPBlockBase *Pred : Predecessors) { + // Block and predecessor must be inside the same region. + assert(Pred->getParent() == VPB->getParent() && + "Predecessor is not in the same region."); + + // There must be a bi-directional link between block and predecessor. + const auto &PredSuccs = Pred->getSuccessors(); + assert(std::find(PredSuccs.begin(), PredSuccs.end(), VPB) != + PredSuccs.end() && + "Missing successor link."); + (void)PredSuccs; + } + } +} + +/// Verify the CFG invariants of VPRegionBlock \p Region and its nested +/// VPBlockBases. Do not recurse inside nested VPRegionBlocks. +static void verifyRegion(const VPRegionBlock *Region) { + const VPBlockBase *Entry = Region->getEntry(); + const VPBlockBase *Exit = Region->getExit(); + + // Entry and Exit shouldn't have any predecessor/successor, respectively. + assert(!Entry->getNumPredecessors() && "Region entry has predecessors."); + assert(!Exit->getNumSuccessors() && "Region exit has successors."); + (void)Entry; + (void)Exit; + + verifyBlocksInRegion(Region); +} + +/// Verify the CFG invariants of VPRegionBlock \p Region and its nested +/// VPBlockBases. Recurse inside nested VPRegionBlocks. +static void verifyRegionRec(const VPRegionBlock *Region) { + verifyRegion(Region); + + // Recurse inside nested regions. + for (const VPBlockBase *VPB : + make_range(df_iterator::begin(Region->getEntry()), + df_iterator::end(Region->getExit()))) { + if (const auto *SubRegion = dyn_cast(VPB)) + verifyRegionRec(SubRegion); + } +} + +void VPlanVerifier::verifyHierarchicalCFG( + const VPRegionBlock *TopRegion) const { + if (!EnableHCFGVerifier) + return; + + DEBUG(dbgs() << "Verifying VPlan H-CFG.\n"); + assert(!TopRegion->getParent() && "VPlan Top Region should have no parent."); + verifyRegionRec(TopRegion); +} Index: test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll =================================================================== --- /dev/null +++ test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll @@ -0,0 +1,53 @@ +; RUN: opt < %s -loop-vectorize -enable-vplan-native-path -vplan-build-stress-test -vplan-verify-hcfg -debug-only=vplan-verifier -disable-output 2>&1 | FileCheck %s -check-prefix=VERIFIER +; RUN: opt < %s -loop-vectorize -enable-vplan-native-path -vplan-build-stress-test -debug-only=vplan-verifier -disable-output 2>&1 | FileCheck %s -check-prefix=NO-VERIFIER -allow-empty +; REQUIRES: asserts + +; Verify that the stress testing flag for the VPlan H-CFG builder works as +; expected with and without enabling the VPlan H-CFG Verifier. + +; VERIFIER: Verifying VPlan H-CFG. +; NO-VERIFIER-NOT: Verifying VPlan H-CFG. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @foo(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr #0 { +entry: + %cmp32 = icmp sgt i32 %N, 0 + br i1 %cmp32, label %outer.ph, label %for.end15 + +outer.ph: + %cmp230 = icmp sgt i32 %M, 0 + %0 = sext i32 %M to i64 + %wide.trip.count = zext i32 %M to i64 + %wide.trip.count38 = zext i32 %N to i64 + br label %outer.body + +outer.body: + %indvars.iv35 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next36, %outer.inc ] + br i1 %cmp230, label %inner.ph, label %outer.inc + +inner.ph: + %1 = mul nsw i64 %indvars.iv35, %0 + br label %inner.body + +inner.body: + %indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ] + %2 = add nsw i64 %indvars.iv, %1 + %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2 + %3 = load i32, i32* %arrayidx, align 4 + %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2 + store i32 %3, i32* %arrayidx12, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %outer.inc, label %inner.body + +outer.inc: + %indvars.iv.next36 = add nuw nsw i64 %indvars.iv35, 1 + %exitcond39 = icmp eq i64 %indvars.iv.next36, %wide.trip.count38 + br i1 %exitcond39, label %for.end15, label %outer.body + +for.end15: + ret void +} + +attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }