diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -164,14 +164,13 @@ "slp-max-look-ahead-depth", cl::init(2), cl::Hidden, cl::desc("The maximum look-ahead depth for operand reordering scores")); -// The maximum depth that the look-ahead score heuristic will explore -// when it probing among candidates for vectorization tree roots. -// The higher this value, the higher the compilation time overhead but unlike -// similar limit for operands ordering this is less frequently used, hence -// impact of higher value is less noticeable. -static cl::opt RootLookAheadMaxDepth( - "slp-max-root-look-ahead-depth", cl::init(2), cl::Hidden, - cl::desc("The maximum look-ahead depth for searching best rooting option")); +// The maximum tree size that will use when probing among candidates for +// vectorization tree roots. The higher this value, the higher the compilation +// time overhead but unlike similar limit for operands ordering this is less +// frequently used, hence impact of higher value is less noticeable. +static cl::opt RootLookAheadMaxSize( + "slp-root-look-ahead-max-size", cl::init(5), cl::Hidden, + cl::desc("The maximum tree size for searching best rooting option")); static cl::opt ViewSLPTree("view-slp-tree", cl::Hidden, @@ -839,6 +838,8 @@ class BoUpSLP { struct TreeEntry; struct ScheduleData; + /// Limit the size of the SLP tree to this many nodes. + Optional MaxTreeSize; public: using ValueList = SmallVector; @@ -893,9 +894,11 @@ InstructionCost getTreeCost(ArrayRef VectorizedVals = None); /// Construct a vectorizable tree that starts at \p Roots, ignoring users for - /// the purpose of scheduling and extraction in the \p UserIgnoreLst. + /// the purpose of scheduling and extraction in the \p UserIgnoreLst. The tree + /// will have at least \p MaxSize nodes if this argument is passed. void buildTree(ArrayRef Roots, - ArrayRef UserIgnoreLst = None); + ArrayRef UserIgnoreLst = None, + Optional MaxSize = None); /// Builds external uses of the vectorized scalars, i.e. the list of /// vectorized scalars to be extracted, their lanes and their scalar users. \p @@ -2018,17 +2021,24 @@ /// above the LookAheadHeuristics::ScoreFail. Optional findBestRootPair(ArrayRef> Candidates) { - LookAheadHeuristics LookAhead(*DL, *SE, *this, /*NumLanes=*/2, - RootLookAheadMaxDepth); - int BestScore = LookAheadHeuristics::ScoreFail; + InstructionCost BestCost = InstructionCost::getMax(); Optional Index = None; + for (int I : seq(0, Candidates.size())) { - int Score = LookAhead.getScoreAtLevelRec(Candidates[I].first, - Candidates[I].second, - /*U1=*/nullptr, /*U2=*/nullptr, - /*Level=*/1, None); - if (Score > BestScore) { - BestScore = Score; + SmallVector Roots( + {Candidates[I].first, Candidates[I].second}); + buildTree(Roots, /*UserIgnoreList=*/None, + RootLookAheadMaxSize.getValue()); + if (isTreeTinyAndNotFullyVectorizable()) + continue; + reorderTopToBottom(); + reorderBottomToTop(!isa(Roots.front())); + buildExternalUses(); + + computeMinimumValueSizes(); + InstructionCost Cost = getTreeCost(); + if (Cost < BestCost) { + BestCost = Cost; Index = I; } } @@ -4079,8 +4089,10 @@ } void BoUpSLP::buildTree(ArrayRef Roots, - ArrayRef UserIgnoreLst) { + ArrayRef UserIgnoreLst, + Optional MaxSize) { deleteTree(); + MaxTreeSize = MaxSize; UserIgnoreList = UserIgnoreLst; if (!allSameType(Roots)) return; @@ -4292,6 +4304,15 @@ }; InstructionsState S = getSameOpcode(VL); + + if (MaxTreeSize && VectorizableTree.size() == *MaxTreeSize) { + LLVM_DEBUG(dbgs() << "SLP: Gathering due to max tree size " << *MaxTreeSize + << ".\n"); + newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndicies); + return; + } + if (Depth == RecursionMaxDepth) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n"); if (TryToFindDuplicates(S)) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-pair-path.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-pair-path.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-pair-path.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-pair-path.ll @@ -61,18 +61,19 @@ define void @root_steering() { ; CHECK-LABEL: @root_steering( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[CHAIN2_2:%.*]] = fadd double 4.000000e-01, 5.000000e-01 -; CHECK-NEXT: [[CHAIN2_1:%.*]] = fmul double 3.000000e-01, [[CHAIN2_2]] -; CHECK-NEXT: [[ROOT5:%.*]] = fadd double 2.000000e-01, [[CHAIN2_1]] ; CHECK-NEXT: [[ROOT3:%.*]] = fmul double 3.000000e-01, 2.000000e-01 ; CHECK-NEXT: [[MUL:%.*]] = fmul double [[ROOT3]], 1.000000e-01 ; CHECK-NEXT: [[CHAINB_3:%.*]] = fadd double 3.000000e-01, 4.000000e-01 ; CHECK-NEXT: [[CHAINB_2:%.*]] = fmul double 2.000000e-01, [[CHAINB_3]] -; CHECK-NEXT: [[CHAINB_1:%.*]] = fadd double 1.000000e-01, [[CHAINB_2]] -; CHECK-NEXT: [[ROOT4:%.*]] = fmul double [[MUL]], [[CHAINB_1]] -; CHECK-NEXT: [[ROOT2:%.*]] = fadd double 1.000000e-01, [[ROOT4]] -; CHECK-NEXT: [[ROOT1:%.*]] = fmul double [[ROOT3]], [[ROOT5]] -; CHECK-NEXT: [[DIV:%.*]] = fdiv double [[ROOT1]], [[ROOT2]] +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> , double [[CHAINB_2]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> , [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> , double [[MUL]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> , [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0 +; CHECK-NEXT: [[ROOT1:%.*]] = fmul double [[ROOT3]], [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1 +; CHECK-NEXT: [[DIV:%.*]] = fdiv double [[ROOT1]], [[TMP6]] ; CHECK-NEXT: [[SEED:%.*]] = fcmp ogt double [[DIV]], 3.000000e-01 ; CHECK-NEXT: ret void ;