Index: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1137,7 +1137,8 @@ const DataLayout &DL, ScalarEvolution &SE); struct TreeEntry { - TreeEntry(std::vector &Container) : Container(Container) {} + using VecTreeTy = SmallVector, 8>; + TreeEntry(VecTreeTy &Container) : Container(Container) {} /// \returns true if the scalars in VL are equal to this entry. bool isSame(ArrayRef VL) const { @@ -1170,7 +1171,7 @@ /// to be a pointer and needs to be able to initialize the child iterator. /// Thus we need a reference back to the container to translate the indices /// to entries. - std::vector &Container; + VecTreeTy &Container; /// The TreeEntry index containing the user of this entry. We can actually /// have multiple users so the data structure is not truly a tree. @@ -1202,8 +1203,8 @@ ArrayRef ReuseShuffleIndices) { if (UserTreeIdx.Idx >= 0) { auto &VectorizableTree = Container; - VectorizableTree[UserTreeIdx.Idx].setOperand(UserTreeIdx.EdgeIdx, OpVL, - ReuseShuffleIndices); + VectorizableTree[UserTreeIdx.Idx]->setOperand(UserTreeIdx.EdgeIdx, OpVL, + ReuseShuffleIndices); } } @@ -1258,13 +1259,13 @@ }; /// Create a new VectorizableTree entry. - void newTreeEntry(ArrayRef VL, bool Vectorized, - EdgeInfo &UserTreeIdx, - ArrayRef ReuseShuffleIndices = None, - ArrayRef ReorderIndices = None) { - VectorizableTree.emplace_back(VectorizableTree); + TreeEntry *newTreeEntry(ArrayRef VL, bool Vectorized, + EdgeInfo &UserTreeIdx, + ArrayRef ReuseShuffleIndices = None, + ArrayRef ReorderIndices = None) { + VectorizableTree.push_back(llvm::make_unique(VectorizableTree)); + TreeEntry *Last = VectorizableTree.back().get(); int idx = VectorizableTree.size() - 1; - TreeEntry *Last = &VectorizableTree[idx]; Last->Scalars.insert(Last->Scalars.begin(), VL.begin(), VL.end()); Last->NeedToGather = !Vectorized; Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(), @@ -1285,18 +1286,19 @@ Last->trySetUserTEOperand(UserTreeIdx, VL, ReuseShuffleIndices); UserTreeIdx.Idx = idx; + return Last; } /// -- Vectorization State -- /// Holds all of the tree entries. - std::vector VectorizableTree; + TreeEntry::VecTreeTy VectorizableTree; #ifndef NDEBUG /// Debug printer. LLVM_DUMP_METHOD void dumpVectorizableTree() const { for (unsigned Id = 0, IdE = VectorizableTree.size(); Id != IdE; ++Id) { dbgs() << Id << ".\n"; - VectorizableTree[Id].dump(); + VectorizableTree[Id]->dump(); dbgs() << "\n"; } } @@ -1305,14 +1307,14 @@ TreeEntry *getTreeEntry(Value *V) { auto I = ScalarToTreeEntry.find(V); if (I != ScalarToTreeEntry.end()) - return &VectorizableTree[I->second]; + return VectorizableTree[I->second].get(); return nullptr; } const TreeEntry *getTreeEntry(Value *V) const { auto I = ScalarToTreeEntry.find(V); if (I != ScalarToTreeEntry.end()) - return &VectorizableTree[I->second]; + return VectorizableTree[I->second].get(); return nullptr; } @@ -1822,21 +1824,25 @@ /// NodeRef has to be a pointer per the GraphWriter. using NodeRef = TreeEntry *; + using ContainerTy = BoUpSLP::TreeEntry::VecTreeTy; + /// Add the VectorizableTree to the index iterator to be able to return /// TreeEntry pointers. struct ChildIteratorType : public iterator_adaptor_base< ChildIteratorType, SmallVector::iterator> { - std::vector &VectorizableTree; + ContainerTy &VectorizableTree; ChildIteratorType(SmallVector::iterator W, - std::vector &VT) + ContainerTy &VT) : ChildIteratorType::iterator_adaptor_base(W), VectorizableTree(VT) {} - NodeRef operator*() { return &VectorizableTree[I->Idx]; } + NodeRef operator*() { return VectorizableTree[I->Idx].get(); } }; - static NodeRef getEntryNode(BoUpSLP &R) { return &R.VectorizableTree[0]; } + static NodeRef getEntryNode(BoUpSLP &R) { + return R.VectorizableTree[0].get(); + } static ChildIteratorType child_begin(NodeRef N) { return {N->UserTreeIndices.begin(), N->Container}; @@ -1848,7 +1854,19 @@ /// For the node iterator we just need to turn the TreeEntry iterator into a /// TreeEntry* iterator so that it dereferences to NodeRef. - using nodes_iterator = pointer_iterator::iterator>; + class nodes_iterator { + using ItTy = ContainerTy::iterator; + ItTy It; + + public: + nodes_iterator(const ItTy &It2) : It(It2) {} + NodeRef operator*() { return It->get(); } + nodes_iterator operator++() { + ++It; + return *this; + } + bool operator!=(const nodes_iterator &N2) const { return N2.It != It; } + }; static nodes_iterator nodes_begin(BoUpSLP *R) { return nodes_iterator(R->VectorizableTree.begin()); @@ -1910,8 +1928,8 @@ buildTree_rec(Roots, 0, EdgeInfo()); // Collect the values that we need to extract from the tree. - for (TreeEntry &EIdx : VectorizableTree) { - TreeEntry *Entry = &EIdx; + for (auto &TEPtr : VectorizableTree) { + TreeEntry *Entry = TEPtr.get(); // No need to handle users of gathered values. if (Entry->NeedToGather) @@ -2154,7 +2172,7 @@ // we are not extending buildTree_rec() towards the operands. ValueList Op0; Op0.assign(VL.size(), VL0->getOperand(0)); - VectorizableTree.back().setOperand(0, Op0, ReuseShuffleIndicies); + VectorizableTree.back()->setOperand(0, Op0, ReuseShuffleIndicies); return; } if (!CurrentOrder.empty()) { @@ -2176,7 +2194,7 @@ // we are not extending buildTree_rec() towards the operands. ValueList Op0; Op0.assign(VL.size(), VL0->getOperand(0)); - VectorizableTree.back().setOperand(0, Op0, ReuseShuffleIndicies); + VectorizableTree.back()->setOperand(0, Op0, ReuseShuffleIndicies); return; } LLVM_DEBUG(dbgs() << "SLP: Gather extract sequence.\n"); @@ -3038,20 +3056,20 @@ << VectorizableTree.size() << " is fully vectorizable .\n"); // We only handle trees of heights 1 and 2. - if (VectorizableTree.size() == 1 && !VectorizableTree[0].NeedToGather) + if (VectorizableTree.size() == 1 && !VectorizableTree[0]->NeedToGather) return true; if (VectorizableTree.size() != 2) return false; // Handle splat and all-constants stores. - if (!VectorizableTree[0].NeedToGather && - (allConstant(VectorizableTree[1].Scalars) || - isSplat(VectorizableTree[1].Scalars))) + if (!VectorizableTree[0]->NeedToGather && + (allConstant(VectorizableTree[1]->Scalars) || + isSplat(VectorizableTree[1]->Scalars))) return true; // Gathering cost would be too much for tiny trees. - if (VectorizableTree[0].NeedToGather || VectorizableTree[1].NeedToGather) + if (VectorizableTree[0]->NeedToGather || VectorizableTree[1]->NeedToGather) return false; return true; @@ -3082,14 +3100,14 @@ // live. When we see a call instruction that is not part of our tree, // query TTI to see if there is a cost to keeping values live over it // (for example, if spills and fills are required). - unsigned BundleWidth = VectorizableTree.front().Scalars.size(); + unsigned BundleWidth = VectorizableTree.front()->Scalars.size(); int Cost = 0; SmallPtrSet LiveValues; Instruction *PrevInst = nullptr; - for (const auto &N : VectorizableTree) { - Instruction *Inst = dyn_cast(N.Scalars[0]); + for (const auto &TEPtr : VectorizableTree) { + Instruction *Inst = dyn_cast(TEPtr->Scalars[0]); if (!Inst) continue; @@ -3147,10 +3165,10 @@ LLVM_DEBUG(dbgs() << "SLP: Calculating cost for tree of size " << VectorizableTree.size() << ".\n"); - unsigned BundleWidth = VectorizableTree[0].Scalars.size(); + unsigned BundleWidth = VectorizableTree[0]->Scalars.size(); for (unsigned I = 0, E = VectorizableTree.size(); I < E; ++I) { - TreeEntry &TE = VectorizableTree[I]; + TreeEntry &TE = *VectorizableTree[I].get(); // We create duplicate tree entries for gather sequences that have multiple // uses. However, we should not compute the cost of duplicate sequences. @@ -3165,10 +3183,11 @@ // existing heuristics based on tree size may yield different results. // if (TE.NeedToGather && - std::any_of(std::next(VectorizableTree.begin(), I + 1), - VectorizableTree.end(), [TE](TreeEntry &Entry) { - return Entry.NeedToGather && Entry.isSame(TE.Scalars); - })) + std::any_of( + std::next(VectorizableTree.begin(), I + 1), VectorizableTree.end(), + [TE](const std::unique_ptr &EntryPtr) { + return EntryPtr->NeedToGather && EntryPtr->isSame(TE.Scalars); + })) continue; int C = getEntryCost(&TE); @@ -3195,7 +3214,7 @@ // extend the extracted value back to the original type. Here, we account // for the extract and the added cost of the sign extend if needed. auto *VecTy = VectorType::get(EU.Scalar->getType(), BundleWidth); - auto *ScalarRoot = VectorizableTree[0].Scalars[0]; + auto *ScalarRoot = VectorizableTree[0]->Scalars[0]; if (MinBWs.count(ScalarRoot)) { auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first); auto Extend = @@ -3943,20 +3962,20 @@ } Builder.SetInsertPoint(&F->getEntryBlock().front()); - auto *VectorRoot = vectorizeTree(&VectorizableTree[0]); + auto *VectorRoot = vectorizeTree(VectorizableTree[0].get()); // If the vectorized tree can be rewritten in a smaller type, we truncate the // vectorized root. InstCombine will then rewrite the entire expression. We // sign extend the extracted values below. - auto *ScalarRoot = VectorizableTree[0].Scalars[0]; + auto *ScalarRoot = VectorizableTree[0]->Scalars[0]; if (MinBWs.count(ScalarRoot)) { if (auto *I = dyn_cast(VectorRoot)) Builder.SetInsertPoint(&*++BasicBlock::iterator(I)); - auto BundleWidth = VectorizableTree[0].Scalars.size(); + auto BundleWidth = VectorizableTree[0]->Scalars.size(); auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first); auto *VecTy = VectorType::get(MinTy, BundleWidth); auto *Trunc = Builder.CreateTrunc(VectorRoot, VecTy); - VectorizableTree[0].VectorizedValue = Trunc; + VectorizableTree[0]->VectorizedValue = Trunc; } LLVM_DEBUG(dbgs() << "SLP: Extracting " << ExternalUses.size() @@ -4052,8 +4071,8 @@ } // For each vectorized value: - for (TreeEntry &EIdx : VectorizableTree) { - TreeEntry *Entry = &EIdx; + for (auto &TEPtr : VectorizableTree) { + TreeEntry *Entry = TEPtr.get(); // No need to handle users of gathered values. if (Entry->NeedToGather) @@ -4086,7 +4105,7 @@ Builder.ClearInsertionPoint(); - return VectorizableTree[0].VectorizedValue; + return VectorizableTree[0]->VectorizedValue; } void BoUpSLP::optimizeGatherSequence() { @@ -4755,7 +4774,7 @@ return; // We only attempt to truncate integer expressions. - auto &TreeRoot = VectorizableTree[0].Scalars; + auto &TreeRoot = VectorizableTree[0]->Scalars; auto *TreeRootIT = dyn_cast(TreeRoot[0]->getType()); if (!TreeRootIT) return; @@ -4776,8 +4795,8 @@ // Collect the scalar values of the vectorizable expression. We will use this // context to determine which values can be demoted. If we see a truncation, // we mark it as seeding another demotion. - for (auto &Entry : VectorizableTree) - Expr.insert(Entry.Scalars.begin(), Entry.Scalars.end()); + for (auto &EntryPtr : VectorizableTree) + Expr.insert(EntryPtr->Scalars.begin(), EntryPtr->Scalars.end()); // Ensure the roots of the vectorizable tree don't form a cycle. They must // have a single external user that is not in the vectorizable tree.