Index: lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- lib/Transforms/Vectorize/SLPVectorizer.cpp +++ lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -39,6 +39,7 @@ #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Vectorize.h" #include @@ -90,6 +91,10 @@ "slp-min-tree-size", cl::init(3), cl::Hidden, cl::desc("Only vectorize small trees if they are fully vectorizable")); +static cl::opt + ViewSLPTree("view-slp-tree", cl::Hidden, + cl::desc("Display the SLP trees with Graphviz")); + // Limit the number of alias checks. The limit is chosen so that // it has no negative effect on the llvm benchmarks. static const unsigned AliasedCheckLimit = 10; @@ -419,7 +424,7 @@ int getEntryCost(TreeEntry *E); /// This is the recursive part of buildTree. - void buildTree_rec(ArrayRef Roots, unsigned Depth); + void buildTree_rec(ArrayRef Roots, unsigned Depth, int); /// \returns True if the ExtractElement/ExtractValue instructions in VL can /// be vectorized to use the original vector (or aggregate "bitcast" to a vector). @@ -468,8 +473,9 @@ SmallVectorImpl &Left, SmallVectorImpl &Right); struct TreeEntry { - TreeEntry() : Scalars(), VectorizedValue(nullptr), - NeedToGather(0), NeedToShuffle(0) {} + TreeEntry(std::vector &Container) + : Scalars(), VectorizedValue(nullptr), NeedToGather(0), + NeedToShuffle(0), Container(Container), UserTreeIdx(-1) {} /// \returns true if the scalars in VL are equal to this entry. bool isSame(ArrayRef VL) const { @@ -499,12 +505,23 @@ /// Do we need to shuffle the load ? bool NeedToShuffle; + + /// Points back to the VectorizableTree. + /// + /// Only used for Graphviz right now. Unfortunately GraphTrait::NodeRef has + /// to be a pointer and needs to be able to initialize the child iterator. + /// Thus we need a reference back to the container to translate the indices + /// to entries. + std::vector &Container; + + /// The TreeEntry index containing the user of this entry. + int UserTreeIdx; }; /// Create a new VectorizableTree entry. TreeEntry *newTreeEntry(ArrayRef VL, bool Vectorized, - bool NeedToShuffle) { - VectorizableTree.emplace_back(); + bool NeedToShuffle, int &UserTreeIdx) { + VectorizableTree.emplace_back(VectorizableTree); int idx = VectorizableTree.size() - 1; TreeEntry *Last = &VectorizableTree[idx]; Last->Scalars.insert(Last->Scalars.begin(), VL.begin(), VL.end()); @@ -518,6 +535,9 @@ } else { MustGather.insert(VL.begin(), VL.end()); } + + Last->UserTreeIdx = UserTreeIdx; + UserTreeIdx = idx; return Last; } @@ -741,6 +761,8 @@ return os; } #endif + friend struct GraphTraits; + friend struct DOTGraphTraits; /// Contains all scheduling data for a basic block. /// @@ -951,9 +973,95 @@ /// original width. MapVector> MinBWs; }; +} // end namespace slpvectorizer + +template <> struct GraphTraits { + typedef BoUpSLP::TreeEntry TreeEntry; + + /// NodeRef has to be a pointer per the GraphWriter. + typedef TreeEntry *NodeRef; + + /// \brief A simple iterator over a (single-element) integer field + /// representing the tree index in VectorizableTree. + struct ChildIteratorType + : public iterator_facade_base { + + std::vector *VectorizableTree; + /// -1 indicates the end iterator. + int TreeIdx; + + ChildIteratorType(std::vector *VT, int TreeIdx = -1) + : VectorizableTree(VT), TreeIdx(TreeIdx) {} + + NodeRef operator*() { + assert(TreeIdx >= 0 && "End dereferenced"); + return &(*VectorizableTree)[TreeIdx]; + } + + ChildIteratorType &operator++() { + TreeIdx = -1; + return *this; + } + + ChildIteratorType &operator=(const ChildIteratorType &R) { + VectorizableTree = R.VectorizableTree; + TreeIdx = R.TreeIdx; + return *this; + } + + bool operator==(const ChildIteratorType &R) const { + return TreeIdx == R.TreeIdx; + } + }; + + static NodeRef getEntryNode(BoUpSLP &R) { return &R.VectorizableTree[0]; } + + static ChildIteratorType child_begin(NodeRef N) { + return {&N->Container, N->UserTreeIdx}; + } + static ChildIteratorType child_end(NodeRef N) { return {&N->Container}; } + + /// For the node iterator we just need to turn the TreeEntry iterator into a + /// TreeEntry* iterator so that it dereferences to NodeRef. + typedef pointer_iterator::iterator> nodes_iterator; + + static nodes_iterator nodes_begin(BoUpSLP *R) { + return nodes_iterator(R->VectorizableTree.begin()); + } + static nodes_iterator nodes_end(BoUpSLP *R) { + return nodes_iterator(R->VectorizableTree.end()); + } + + static unsigned size(BoUpSLP *R) { return R->VectorizableTree.size(); } +}; + +template <> struct DOTGraphTraits : public DefaultDOTGraphTraits { + typedef BoUpSLP::TreeEntry TreeEntry; + + DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} + + std::string getNodeLabel(const TreeEntry *Entry, const BoUpSLP *) { + std::string Str; + raw_string_ostream OS(Str); + if (isSplat(Entry->Scalars)) { + OS << " " << *Entry->Scalars[0]; + return Str; + } + for (auto V : Entry->Scalars) + OS << *V << "\n"; + return Str; + } + + static std::string getNodeAttributes(const TreeEntry *Entry, + const BoUpSLP *) { + if (Entry->NeedToGather) + return "color=red"; + return ""; + } +}; } // end namespace llvm -} // end namespace slpvectorizer void BoUpSLP::buildTree(ArrayRef Roots, ArrayRef UserIgnoreLst) { @@ -967,7 +1075,7 @@ UserIgnoreList = UserIgnoreLst; if (!allSameType(Roots)) return; - buildTree_rec(Roots, 0); + buildTree_rec(Roots, 0, -1); // Collect the values that we need to extract from the tree. for (TreeEntry &EIdx : VectorizableTree) { @@ -1025,28 +1133,28 @@ } } - -void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth) { +void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, + int UserTreeIdx) { bool isAltShuffle = false; assert((allConstant(VL) || allSameType(VL)) && "Invalid types!"); if (Depth == RecursionMaxDepth) { DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n"); - newTreeEntry(VL, false, false); + newTreeEntry(VL, false, false, UserTreeIdx); return; } // Don't handle vectors. if (VL[0]->getType()->isVectorTy()) { DEBUG(dbgs() << "SLP: Gathering due to vector type.\n"); - newTreeEntry(VL, false, false); + newTreeEntry(VL, false, false, UserTreeIdx); return; } if (StoreInst *SI = dyn_cast(VL[0])) if (SI->getValueOperand()->getType()->isVectorTy()) { DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n"); - newTreeEntry(VL, false, false); + newTreeEntry(VL, false, false, UserTreeIdx); return; } unsigned Opcode = getSameOpcode(VL); @@ -1063,7 +1171,7 @@ // If all of the operands are identical or constant we have a simple solution. if (allConstant(VL) || isSplat(VL) || !allSameBlock(VL) || !Opcode) { DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n"); - newTreeEntry(VL, false, false); + newTreeEntry(VL, false, false, UserTreeIdx); return; } @@ -1075,7 +1183,7 @@ if (EphValues.count(VL[i])) { DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] << ") is ephemeral.\n"); - newTreeEntry(VL, false, false); + newTreeEntry(VL, false, false, UserTreeIdx); return; } } @@ -1088,7 +1196,7 @@ DEBUG(dbgs() << "SLP: \tChecking bundle: " << *VL[i] << ".\n"); if (E->Scalars[i] != VL[i]) { DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n"); - newTreeEntry(VL, false, false); + newTreeEntry(VL, false, false, UserTreeIdx); return; } } @@ -1101,7 +1209,7 @@ if (ScalarToTreeEntry.count(VL[i])) { DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] << ") is already in tree.\n"); - newTreeEntry(VL, false, false); + newTreeEntry(VL, false, false, UserTreeIdx); return; } } @@ -1111,7 +1219,7 @@ for (unsigned i = 0, e = VL.size(); i != e; ++i) { if (MustGather.count(VL[i])) { DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n"); - newTreeEntry(VL, false, false); + newTreeEntry(VL, false, false, UserTreeIdx); return; } } @@ -1125,7 +1233,7 @@ // Don't go into unreachable blocks. They may contain instructions with // dependency cycles which confuse the final scheduling. DEBUG(dbgs() << "SLP: bundle in unreachable block.\n"); - newTreeEntry(VL, false, false); + newTreeEntry(VL, false, false, UserTreeIdx); return; } @@ -1134,7 +1242,7 @@ for (unsigned j = i+1; j < e; ++j) if (VL[i] == VL[j]) { DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n"); - newTreeEntry(VL, false, false); + newTreeEntry(VL, false, false, UserTreeIdx); return; } @@ -1149,7 +1257,7 @@ assert((!BS.getScheduleData(VL[0]) || !BS.getScheduleData(VL[0])->isPartOfBundle()) && "tryScheduleBundle should cancelScheduling on failure"); - newTreeEntry(VL, false, false); + newTreeEntry(VL, false, false, UserTreeIdx); return; } DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n"); @@ -1166,12 +1274,12 @@ if (Term) { DEBUG(dbgs() << "SLP: Need to swizzle PHINodes (TerminatorInst use).\n"); BS.cancelScheduling(VL); - newTreeEntry(VL, false, false); + newTreeEntry(VL, false, false, UserTreeIdx); return; } } - newTreeEntry(VL, true, false); + newTreeEntry(VL, true, false, UserTreeIdx); DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n"); for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) { @@ -1181,7 +1289,7 @@ Operands.push_back(cast(j)->getIncomingValueForBlock( PH->getIncomingBlock(i))); - buildTree_rec(Operands, Depth + 1); + buildTree_rec(Operands, Depth + 1, UserTreeIdx); } return; } @@ -1193,7 +1301,7 @@ } else { BS.cancelScheduling(VL); } - newTreeEntry(VL, Reuse, false); + newTreeEntry(VL, Reuse, false, UserTreeIdx); return; } case Instruction::Load: { @@ -1209,7 +1317,7 @@ if (DL->getTypeSizeInBits(ScalarTy) != DL->getTypeAllocSizeInBits(ScalarTy)) { BS.cancelScheduling(VL); - newTreeEntry(VL, false, false); + newTreeEntry(VL, false, false, UserTreeIdx); DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n"); return; } @@ -1220,7 +1328,7 @@ LoadInst *L = cast(VL[i]); if (!L->isSimple()) { BS.cancelScheduling(VL); - newTreeEntry(VL, false, false); + newTreeEntry(VL, false, false, UserTreeIdx); DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n"); return; } @@ -1240,7 +1348,7 @@ if (Consecutive) { ++NumLoadsWantToKeepOrder; - newTreeEntry(VL, true, false); + newTreeEntry(VL, true, false, UserTreeIdx); DEBUG(dbgs() << "SLP: added a vector of loads.\n"); return; } @@ -1266,14 +1374,14 @@ } } if (ShuffledLoads) { - newTreeEntry(NewVL, true, true); + newTreeEntry(NewVL, true, true, UserTreeIdx); return; } } } BS.cancelScheduling(VL); - newTreeEntry(VL, false, false); + newTreeEntry(VL, false, false, UserTreeIdx); if (ReverseConsecutive) { ++NumLoadsWantToChangeOrder; @@ -1300,12 +1408,12 @@ Type *Ty = cast(Val)->getOperand(0)->getType(); if (Ty != SrcTy || !isValidElementType(Ty)) { BS.cancelScheduling(VL); - newTreeEntry(VL, false, false); + newTreeEntry(VL, false, false, UserTreeIdx); DEBUG(dbgs() << "SLP: Gathering casts with different src types.\n"); return; } } - newTreeEntry(VL, true, false); + newTreeEntry(VL, true, false, UserTreeIdx); DEBUG(dbgs() << "SLP: added a vector of casts.\n"); for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { @@ -1314,7 +1422,7 @@ for (Value *j : VL) Operands.push_back(cast(j)->getOperand(i)); - buildTree_rec(Operands, Depth+1); + buildTree_rec(Operands, Depth + 1, UserTreeIdx); } return; } @@ -1328,13 +1436,13 @@ if (Cmp->getPredicate() != P0 || Cmp->getOperand(0)->getType() != ComparedTy) { BS.cancelScheduling(VL); - newTreeEntry(VL, false, false); + newTreeEntry(VL, false, false, UserTreeIdx); DEBUG(dbgs() << "SLP: Gathering cmp with different predicate.\n"); return; } } - newTreeEntry(VL, true, false); + newTreeEntry(VL, true, false, UserTreeIdx); DEBUG(dbgs() << "SLP: added a vector of compares.\n"); for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { @@ -1343,7 +1451,7 @@ for (Value *j : VL) Operands.push_back(cast(j)->getOperand(i)); - buildTree_rec(Operands, Depth+1); + buildTree_rec(Operands, Depth + 1, UserTreeIdx); } return; } @@ -1366,7 +1474,7 @@ case Instruction::And: case Instruction::Or: case Instruction::Xor: { - newTreeEntry(VL, true, false); + newTreeEntry(VL, true, false, UserTreeIdx); DEBUG(dbgs() << "SLP: added a vector of bin op.\n"); // Sort operands of the instructions so that each side is more likely to @@ -1374,8 +1482,8 @@ if (isa(VL0) && VL0->isCommutative()) { ValueList Left, Right; reorderInputsAccordingToOpcode(VL, Left, Right); - buildTree_rec(Left, Depth + 1); - buildTree_rec(Right, Depth + 1); + buildTree_rec(Left, Depth + 1, UserTreeIdx); + buildTree_rec(Right, Depth + 1, UserTreeIdx); return; } @@ -1385,7 +1493,7 @@ for (Value *j : VL) Operands.push_back(cast(j)->getOperand(i)); - buildTree_rec(Operands, Depth+1); + buildTree_rec(Operands, Depth + 1, UserTreeIdx); } return; } @@ -1395,7 +1503,7 @@ if (cast(Val)->getNumOperands() != 2) { DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n"); BS.cancelScheduling(VL); - newTreeEntry(VL, false, false); + newTreeEntry(VL, false, false, UserTreeIdx); return; } } @@ -1408,7 +1516,7 @@ if (Ty0 != CurTy) { DEBUG(dbgs() << "SLP: not-vectorizable GEP (different types).\n"); BS.cancelScheduling(VL); - newTreeEntry(VL, false, false); + newTreeEntry(VL, false, false, UserTreeIdx); return; } } @@ -1420,12 +1528,12 @@ DEBUG( dbgs() << "SLP: not-vectorizable GEP (non-constant indexes).\n"); BS.cancelScheduling(VL); - newTreeEntry(VL, false, false); + newTreeEntry(VL, false, false, UserTreeIdx); return; } } - newTreeEntry(VL, true, false); + newTreeEntry(VL, true, false, UserTreeIdx); DEBUG(dbgs() << "SLP: added a vector of GEPs.\n"); for (unsigned i = 0, e = 2; i < e; ++i) { ValueList Operands; @@ -1433,7 +1541,7 @@ for (Value *j : VL) Operands.push_back(cast(j)->getOperand(i)); - buildTree_rec(Operands, Depth + 1); + buildTree_rec(Operands, Depth + 1, UserTreeIdx); } return; } @@ -1442,19 +1550,19 @@ for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) { BS.cancelScheduling(VL); - newTreeEntry(VL, false, false); + newTreeEntry(VL, false, false, UserTreeIdx); DEBUG(dbgs() << "SLP: Non-consecutive store.\n"); return; } - newTreeEntry(VL, true, false); + newTreeEntry(VL, true, false, UserTreeIdx); DEBUG(dbgs() << "SLP: added a vector of stores.\n"); ValueList Operands; for (Value *j : VL) Operands.push_back(cast(j)->getOperand(0)); - buildTree_rec(Operands, Depth + 1); + buildTree_rec(Operands, Depth + 1, UserTreeIdx); return; } case Instruction::Call: { @@ -1465,7 +1573,7 @@ Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); if (!isTriviallyVectorizable(ID)) { BS.cancelScheduling(VL); - newTreeEntry(VL, false, false); + newTreeEntry(VL, false, false, UserTreeIdx); DEBUG(dbgs() << "SLP: Non-vectorizable call.\n"); return; } @@ -1479,7 +1587,7 @@ getVectorIntrinsicIDForCall(CI2, TLI) != ID || !CI->hasIdenticalOperandBundleSchema(*CI2)) { BS.cancelScheduling(VL); - newTreeEntry(VL, false, false); + newTreeEntry(VL, false, false, UserTreeIdx); DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *VL[i] << "\n"); return; @@ -1490,7 +1598,7 @@ Value *A1J = CI2->getArgOperand(1); if (A1I != A1J) { BS.cancelScheduling(VL); - newTreeEntry(VL, false, false); + newTreeEntry(VL, false, false, UserTreeIdx); DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI << " argument "<< A1I<<"!=" << A1J << "\n"); @@ -1503,14 +1611,14 @@ CI->op_begin() + CI->getBundleOperandsEndIndex(), CI2->op_begin() + CI2->getBundleOperandsStartIndex())) { BS.cancelScheduling(VL); - newTreeEntry(VL, false, false); + newTreeEntry(VL, false, false, UserTreeIdx); DEBUG(dbgs() << "SLP: mismatched bundle operands in calls:" << *CI << "!=" << *VL[i] << '\n'); return; } } - newTreeEntry(VL, true, false); + newTreeEntry(VL, true, false, UserTreeIdx); for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) { ValueList Operands; // Prepare the operand vector. @@ -1518,7 +1626,7 @@ CallInst *CI2 = dyn_cast(j); Operands.push_back(CI2->getArgOperand(i)); } - buildTree_rec(Operands, Depth + 1); + buildTree_rec(Operands, Depth + 1, UserTreeIdx); } return; } @@ -1527,19 +1635,19 @@ // then do not vectorize this instruction. if (!isAltShuffle) { BS.cancelScheduling(VL); - newTreeEntry(VL, false, false); + newTreeEntry(VL, false, false, UserTreeIdx); DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n"); return; } - newTreeEntry(VL, true, false); + newTreeEntry(VL, true, false, UserTreeIdx); DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n"); // Reorder operands if reordering would enable vectorization. if (isa(VL0)) { ValueList Left, Right; reorderAltShuffleOperands(VL, Left, Right); - buildTree_rec(Left, Depth + 1); - buildTree_rec(Right, Depth + 1); + buildTree_rec(Left, Depth + 1, UserTreeIdx); + buildTree_rec(Right, Depth + 1, UserTreeIdx); return; } @@ -1549,13 +1657,13 @@ for (Value *j : VL) Operands.push_back(cast(j)->getOperand(i)); - buildTree_rec(Operands, Depth + 1); + buildTree_rec(Operands, Depth + 1, UserTreeIdx); } return; } default: BS.cancelScheduling(VL); - newTreeEntry(VL, false, false); + newTreeEntry(VL, false, false, UserTreeIdx); DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n"); return; } @@ -2022,9 +2130,20 @@ int SpillCost = getSpillCost(); Cost += SpillCost + ExtractCost; - DEBUG(dbgs() << "SLP: Spill Cost = " << SpillCost << ".\n" - << "SLP: Extract Cost = " << ExtractCost << ".\n" - << "SLP: Total Cost = " << Cost << ".\n"); + std::string Str; +#ifndef NDEBUG + { + raw_string_ostream OS(Str); + OS << "SLP: Spill Cost = " << SpillCost << ".\n" + << "SLP: Extract Cost = " << ExtractCost << ".\n" + << "SLP: Total Cost = " << Cost << ".\n"; + } + DEBUG(dbgs() << Str); +#endif + + if (ViewSLPTree) + ViewGraph(this, "SLP" + F->getName(), false, Str); + return Cost; }