Index: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -393,6 +393,10 @@ /// \returns number of elements in vector if isomorphism exists, 0 otherwise. unsigned canMapToVector(Type *T, const DataLayout &DL) const; + /// \returns True if the VectorizableTree is both tiny and not fully + /// vectorizable. We do not vectorize such trees. + bool isTreeTinyAndNotFullyVectorizable(); + private: struct TreeEntry; @@ -1807,6 +1811,27 @@ return true; } +bool BoUpSLP::isTreeTinyAndNotFullyVectorizable() { + + // We can vectorize the tree if its size is greater than or equal to the + // minimum size specified by the MinTreeSize command line option. + if (VectorizableTree.size() >= MinTreeSize) + return false; + + // If we have a tiny tree (a tree whose size is less than MinTreeSize), we + // can vectorize it if we can prove it fully vectorizable. + if (isFullyVectorizableTinyTree()) + return false; + + assert(VectorizableTree.empty() + ? ExternalUses.empty() + : true && "We shouldn't have any external users"); + + // Otherwise, we can't vectorize the tree. It is both tiny and not fully + // vectorizable. + return true; +} + int BoUpSLP::getSpillCost() { // Walk from the bottom of the tree to the top, tracking which values are // live. When we see a call instruction that is not part of our tree, @@ -1874,14 +1899,6 @@ DEBUG(dbgs() << "SLP: Calculating cost for tree of size " << VectorizableTree.size() << ".\n"); - // We only vectorize tiny trees if it is fully vectorizable. - if (VectorizableTree.size() < MinTreeSize && !isFullyVectorizableTinyTree()) { - if (VectorizableTree.empty()) { - assert(!ExternalUses.size() && "We should not have any external users"); - } - return INT_MAX; - } - unsigned BundleWidth = VectorizableTree[0].Scalars.size(); for (TreeEntry &TE : VectorizableTree) { @@ -3698,6 +3715,9 @@ ArrayRef Operands = Chain.slice(i, VF); R.buildTree(Operands); + if (R.isTreeTinyAndNotFullyVectorizable()) + continue; + R.computeMinimumValueSizes(); int Cost = R.getTreeCost(); @@ -3898,6 +3918,9 @@ Value *ReorderedOps[] = { Ops[1], Ops[0] }; R.buildTree(ReorderedOps, None); } + if (R.isTreeTinyAndNotFullyVectorizable()) + continue; + R.computeMinimumValueSizes(); int Cost = R.getTreeCost(); @@ -4174,7 +4197,10 @@ if (V.shouldReorder()) { SmallVector Reversed(VL.rbegin(), VL.rend()); V.buildTree(Reversed, ReductionOps); - } + } + if (V.isTreeTinyAndNotFullyVectorizable()) + continue; + V.computeMinimumValueSizes(); // Estimate cost. Index: llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-root.ll =================================================================== --- llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-root.ll +++ llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-root.ll @@ -1,5 +1,6 @@ ; RUN: opt < %s -slp-vectorizer -S | FileCheck %s --check-prefix=DEFAULT ; RUN: opt < %s -slp-schedule-budget=0 -slp-min-tree-size=0 -slp-threshold=-30 -slp-vectorizer -S | FileCheck %s --check-prefix=GATHER +; RUN: opt < %s -slp-schedule-budget=0 -slp-threshold=-30 -slp-vectorizer -S | FileCheck %s --check-prefix=MAX-COST target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-gnu" @@ -44,6 +45,9 @@ ; GATHER: %[[R5:.+]] = add <8 x i32> %[[R3]], %[[R4]] ; GATHER: %[[R6:.+]] = extractelement <8 x i32> %[[R5]], i32 0 ; GATHER: %tmp34 = add i32 %[[R6]], %tmp17 +; +; MAX-COST-LABEL: @PR28330( +; MAX-COST-NOT: shufflevector define void @PR28330(i32 %n) { entry: