diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6441,8 +6441,13 @@ OptSpecifier SLPVectAliasOption = EnableSLPVec ? options::OPT_O_Group : options::OPT_fslp_vectorize; if (Args.hasFlag(options::OPT_fslp_vectorize, SLPVectAliasOption, - options::OPT_fno_slp_vectorize, EnableSLPVec)) + options::OPT_fno_slp_vectorize, EnableSLPVec)) { CmdArgs.push_back("-vectorize-slp"); + if (IsUsingLTO) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-slp-limit-to-reg-size"); + } + } ParseMPreferVectorWidth(D, Args, CmdArgs); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -172,6 +172,10 @@ cl::desc("The maximum number of users to visit while visiting the " "predecessors. This prevents compilation time increase.")); +static cl::opt SLPLimitToRegSize( + "slp-limit-to-reg-size", cl::init(false), cl::Hidden, + cl::desc("Try to vectorize using only maximal vector register size.")); + static cl::opt ViewSLPTree("view-slp-tree", cl::Hidden, cl::desc("Display the SLP trees with Graphviz")); @@ -7453,7 +7457,8 @@ const unsigned MinVF = R.getMinVecRegSize() / Sz; unsigned VF = Chain.size(); - if (!isPowerOf2_32(Sz) || !isPowerOf2_32(VF) || VF < 2 || VF < MinVF) + if (!isPowerOf2_32(Sz) || !isPowerOf2_32(VF) || VF < 2 || VF < MinVF || + (SLPLimitToRegSize && VF < R.getMaxVecRegSize() / Sz)) return false; LLVM_DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << Idx @@ -7717,6 +7722,7 @@ Type *ScalarTy = VL[0]->getType(); if (auto *IE = dyn_cast(VL[0])) ScalarTy = IE->getOperand(1)->getType(); + unsigned MaxRegSz = R.getMaxVecRegSize() / Sz; unsigned NextInst = 0, MaxInst = VL.size(); for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF; VF /= 2) { @@ -7737,7 +7743,8 @@ if (!isPowerOf2_32(OpsWidth)) continue; - if ((VF > MinVF && OpsWidth <= VF / 2) || (VF == MinVF && OpsWidth < 2)) + if ((SLPLimitToRegSize && OpsWidth < MaxRegSz) || + (VF > MinVF && OpsWidth <= VF / 2) || (VF == MinVF && OpsWidth < 2)) break; ArrayRef Ops = VL.slice(I, OpsWidth);