Index: lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- lib/Transforms/Vectorize/SLPVectorizer.cpp +++ lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -481,6 +481,8 @@ /// Bottom Up SLP Vectorizer. class BoUpSLP { + struct TreeEntry; + public: using ValueList = SmallVector; using InstrList = SmallVector; @@ -616,8 +618,10 @@ /// (ii) the index of the edge. struct EdgeInfo { EdgeInfo() = default; - /// The index of the user TreeEntry in VectorizableTree. - int Idx = -1; + EdgeInfo(TreeEntry *UserTE, unsigned EdgeIdx) + : UserTE(UserTE), EdgeIdx(EdgeIdx) {} + /// The user TreeEntry. + TreeEntry *UserTE = nullptr; /// The operand index of the use. unsigned EdgeIdx = UINT_MAX; #ifndef NDEBUG @@ -628,7 +632,8 @@ } /// Debug print. void dump(raw_ostream &OS) const { - OS << "{User:" << Idx << " EdgeIdx:" << EdgeIdx << "}"; + OS << "{User:" << (UserTE ? std::to_string(UserTE->Idx) : "null") + << " EdgeIdx:" << EdgeIdx << "}"; } LLVM_DUMP_METHOD void dump() const { dump(dbgs()); } #endif @@ -1079,8 +1084,6 @@ }; private: - struct TreeEntry; - /// Checks if all users of \p I are the part of the vectorization tree. bool areAllUsersVectorized(Instruction *I) const; @@ -1088,7 +1091,8 @@ int getEntryCost(TreeEntry *E); /// This is the recursive part of buildTree. - void buildTree_rec(ArrayRef Roots, unsigned Depth, EdgeInfo EI); + void buildTree_rec(ArrayRef Roots, unsigned Depth, + const EdgeInfo &EI); /// \returns true if the ExtractElement/ExtractValue instructions in \p VL can /// be vectorized to use the original vector (or aggregate "bitcast" to a @@ -1173,6 +1177,9 @@ /// have multiple users so the data structure is not truly a tree. SmallVector UserTreeIndices; + /// The index of this treeEntry in VectorizableTree. + int Idx = -1; + private: /// The operands of each instruction in each lane Operands[op_index][lane]. /// Note: This helps avoid the replication of the code that performs the @@ -1197,11 +1204,9 @@ void trySetUserTEOperand(const EdgeInfo &UserTreeIdx, ArrayRef OpVL, ArrayRef ReuseShuffleIndices) { - if (UserTreeIdx.Idx >= 0) { - auto &VectorizableTree = Container; - VectorizableTree[UserTreeIdx.Idx]->setOperand(UserTreeIdx.EdgeIdx, OpVL, - ReuseShuffleIndices); - } + if (UserTreeIdx.UserTE) + UserTreeIdx.UserTE->setOperand(UserTreeIdx.EdgeIdx, OpVL, + ReuseShuffleIndices); } /// \returns the \p OpIdx operand of this TreeEntry. @@ -1220,6 +1225,7 @@ #ifndef NDEBUG /// Debug printer. LLVM_DUMP_METHOD void dump() const { + dbgs() << Idx << ".\n"; for (unsigned OpI = 0, OpE = Operands.size(); OpI != OpE; ++OpI) { dbgs() << "Operand " << OpI << ":\n"; for (const Value *V : Operands[OpI]) @@ -1256,12 +1262,12 @@ /// Create a new VectorizableTree entry. TreeEntry *newTreeEntry(ArrayRef VL, bool Vectorized, - EdgeInfo &UserTreeIdx, + const EdgeInfo &UserTreeIdx, ArrayRef ReuseShuffleIndices = None, ArrayRef ReorderIndices = None) { VectorizableTree.push_back(llvm::make_unique(VectorizableTree)); TreeEntry *Last = VectorizableTree.back().get(); - int idx = VectorizableTree.size() - 1; + Last->Idx = VectorizableTree.size() - 1; Last->Scalars.insert(Last->Scalars.begin(), VL.begin(), VL.end()); Last->NeedToGather = !Vectorized; Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(), @@ -1270,18 +1276,16 @@ if (Vectorized) { for (int i = 0, e = VL.size(); i != e; ++i) { assert(!getTreeEntry(VL[i]) && "Scalar already in tree!"); - ScalarToTreeEntry[VL[i]] = idx; + ScalarToTreeEntry[VL[i]] = Last->Idx; } } else { MustGather.insert(VL.begin(), VL.end()); } - if (UserTreeIdx.Idx >= 0) + if (UserTreeIdx.UserTE) Last->UserTreeIndices.push_back(UserTreeIdx); Last->trySetUserTEOperand(UserTreeIdx, VL, ReuseShuffleIndices); - - UserTreeIdx.Idx = idx; return Last; } @@ -1833,7 +1837,7 @@ ContainerTy &VT) : ChildIteratorType::iterator_adaptor_base(W), VectorizableTree(VT) {} - NodeRef operator*() { return VectorizableTree[I->Idx].get(); } + NodeRef operator*() { return I->UserTE; } }; static NodeRef getEntryNode(BoUpSLP &R) { @@ -1983,7 +1987,7 @@ } void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, - EdgeInfo UserTreeIdx) { + const EdgeInfo &UserTreeIdx) { assert((allConstant(VL) || allSameType(VL)) && "Invalid types!"); InstructionsState S = getSameOpcode(VL); @@ -2140,7 +2144,7 @@ } } - newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n"); for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) { @@ -2150,8 +2154,7 @@ Operands.push_back(cast(j)->getIncomingValueForBlock( PH->getIncomingBlock(i))); - UserTreeIdx.EdgeIdx = i; - buildTree_rec(Operands, Depth + 1, UserTreeIdx); + buildTree_rec(Operands, Depth + 1, {TE, i}); } return; } @@ -2296,7 +2299,7 @@ return; } } - newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n"); for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { @@ -2305,8 +2308,7 @@ for (Value *j : VL) Operands.push_back(cast(j)->getOperand(i)); - UserTreeIdx.EdgeIdx = i; - buildTree_rec(Operands, Depth + 1, UserTreeIdx); + buildTree_rec(Operands, Depth + 1, {TE, i}); } return; } @@ -2328,7 +2330,7 @@ } } - newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n"); ValueList Left, Right; @@ -2350,10 +2352,8 @@ } } - UserTreeIdx.EdgeIdx = 0; - buildTree_rec(Left, Depth + 1, UserTreeIdx); - UserTreeIdx.EdgeIdx = 1; - buildTree_rec(Right, Depth + 1, UserTreeIdx); + buildTree_rec(Left, Depth + 1, {TE, 0}); + buildTree_rec(Right, Depth + 1, {TE, 1}); return; } case Instruction::Select: @@ -2374,8 +2374,8 @@ case Instruction::AShr: case Instruction::And: case Instruction::Or: - case Instruction::Xor: - newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + case Instruction::Xor: { + auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a vector of bin op.\n"); // Sort operands of the instructions so that each side is more likely to @@ -2383,10 +2383,8 @@ if (isa(VL0) && VL0->isCommutative()) { ValueList Left, Right; reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE); - UserTreeIdx.EdgeIdx = 0; - buildTree_rec(Left, Depth + 1, UserTreeIdx); - UserTreeIdx.EdgeIdx = 1; - buildTree_rec(Right, Depth + 1, UserTreeIdx); + buildTree_rec(Left, Depth + 1, {TE, 0}); + buildTree_rec(Right, Depth + 1, {TE, 1}); return; } @@ -2396,11 +2394,10 @@ for (Value *j : VL) Operands.push_back(cast(j)->getOperand(i)); - UserTreeIdx.EdgeIdx = i; - buildTree_rec(Operands, Depth + 1, UserTreeIdx); + buildTree_rec(Operands, Depth + 1, {TE, i}); } return; - + } case Instruction::GetElementPtr: { // We don't combine GEPs with complicated (nested) indexing. for (unsigned j = 0; j < VL.size(); ++j) { @@ -2438,7 +2435,7 @@ } } - newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a vector of GEPs.\n"); for (unsigned i = 0, e = 2; i < e; ++i) { ValueList Operands; @@ -2446,8 +2443,7 @@ for (Value *j : VL) Operands.push_back(cast(j)->getOperand(i)); - UserTreeIdx.EdgeIdx = i; - buildTree_rec(Operands, Depth + 1, UserTreeIdx); + buildTree_rec(Operands, Depth + 1, {TE, i}); } return; } @@ -2461,15 +2457,14 @@ return; } - newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n"); ValueList Operands; for (Value *j : VL) Operands.push_back(cast(j)->getOperand(0)); - UserTreeIdx.EdgeIdx = 0; - buildTree_rec(Operands, Depth + 1, UserTreeIdx); + buildTree_rec(Operands, Depth + 1, {TE, 0}); return; } case Instruction::Call: { @@ -2529,7 +2524,7 @@ } } - newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) { ValueList Operands; // Prepare the operand vector. @@ -2537,12 +2532,11 @@ CallInst *CI2 = dyn_cast(j); Operands.push_back(CI2->getArgOperand(i)); } - UserTreeIdx.EdgeIdx = i; - buildTree_rec(Operands, Depth + 1, UserTreeIdx); + buildTree_rec(Operands, Depth + 1, {TE, i}); } return; } - case Instruction::ShuffleVector: + case Instruction::ShuffleVector: { // If this is not an alternate sequence of opcode like add-sub // then do not vectorize this instruction. if (!S.isAltShuffle()) { @@ -2551,17 +2545,15 @@ LLVM_DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n"); return; } - newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n"); // Reorder operands if reordering would enable vectorization. if (isa(VL0)) { ValueList Left, Right; reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE); - UserTreeIdx.EdgeIdx = 0; - buildTree_rec(Left, Depth + 1, UserTreeIdx); - UserTreeIdx.EdgeIdx = 1; - buildTree_rec(Right, Depth + 1, UserTreeIdx); + buildTree_rec(Left, Depth + 1, {TE, 0}); + buildTree_rec(Right, Depth + 1, {TE, 1}); return; } @@ -2571,11 +2563,10 @@ for (Value *j : VL) Operands.push_back(cast(j)->getOperand(i)); - UserTreeIdx.EdgeIdx = i; - buildTree_rec(Operands, Depth + 1, UserTreeIdx); + buildTree_rec(Operands, Depth + 1, {TE, i}); } return; - + } default: BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);