Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -629,7 +629,7 @@ /// the stored value. Otherwise, the size is the width of the largest loaded /// value reaching V. This method is used by the vectorizer to calculate /// vectorization factors. - unsigned getVectorElementSize(Value *V) const; + unsigned getVectorElementSize(Value *V); /// Compute the minimum type sizes required to represent the entries in a /// vectorizable tree. @@ -1715,6 +1715,9 @@ /// Maps a specific scalar to its tree entry. SmallDenseMap ScalarToTreeEntry; + /// Maps an instruction to the proposed vectorizable size. + SmallDenseMap InstrElementSize; + /// A list of scalars that we found that we need to keep as scalars. ValueSet MustGather; @@ -4797,6 +4800,7 @@ } Builder.ClearInsertionPoint(); + InstrElementSize.clear(); return VectorizableTree[0]->VectorizedValue; } @@ -5333,7 +5337,7 @@ BS->ScheduleStart = nullptr; } -unsigned BoUpSLP::getVectorElementSize(Value *V) const { +unsigned BoUpSLP::getVectorElementSize(Value *V) { // If V is a store, just return the width of the stored value without // traversing the expression tree. This is the common case. if (auto *Store = dyn_cast(V)) @@ -5343,11 +5347,17 @@ // that feed it. The type of the loaded value may indicate a more suitable // width than V's type. We want to base the vector element size on the width // of memory operations where possible. + auto *Inst = dyn_cast(V); SmallVector Worklist; SmallPtrSet Visited; - if (auto *I = dyn_cast(V)) { - Worklist.push_back(I); - Visited.insert(I); + BasicBlock *P = nullptr; + if (Inst) { + P = Inst->getParent(); + auto E = InstrElementSize.find(Inst); + if (E != InstrElementSize.end()) + return E->second; + Worklist.push_back(Inst); + Visited.insert(Inst); } // Traverse the expression tree in bottom-up order looking for loads. If we @@ -5375,7 +5385,7 @@ isa(I) || isa(I) || isa(I)) { for (Use &U : I->operands()) if (auto *J = dyn_cast(U.get())) - if (Visited.insert(J).second) + if (P == J->getParent() && Visited.insert(J).second) Worklist.push_back(J); } @@ -5384,13 +5394,16 @@ FoundUnknownInst = true; } + int Width = MaxWidth; // If we didn't encounter a memory access in the expression tree, or if we // gave up for some reason, just return the width of V. if (!MaxWidth || FoundUnknownInst) - return DL->getTypeSizeInBits(V->getType()); + Width = DL->getTypeSizeInBits(V->getType()); + + for (Instruction *I : Visited) + InstrElementSize[I] = Width; - // Otherwise, return the maximum width we found. - return MaxWidth; + return Width; } // Determine if a value V in a vectorizable expression Expr can be demoted to a