diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -734,6 +734,10 @@ return MinVecRegSize; } + unsigned getMinVF(unsigned Sz) const { + return std::max(2U, getMinVecRegSize() / Sz); + } + unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { unsigned MaxVF = MaxVFOption.getNumOccurrences() ? MaxVFOption : TTI->getMaximumVF(ElemWidth, Opcode); @@ -4187,8 +4191,7 @@ unsigned VectorizedCnt = 0; unsigned ScatterVectorizeCnt = 0; const unsigned Sz = DL->getTypeSizeInBits(E->getMainOp()->getType()); - for (unsigned MinVF = getMinVecRegSize() / (2 * Sz); VF >= MinVF; - VF /= 2) { + for (unsigned MinVF = getMinVF(2 * Sz); VF >= MinVF; VF /= 2) { for (unsigned Cnt = StartIdx, End = VL.size(); Cnt + VF <= End; Cnt += VF) { ArrayRef Slice = VL.slice(Cnt, VF); @@ -7448,7 +7451,7 @@ unsigned EltSize = R.getVectorElementSize(Operands[0]); unsigned MaxElts = llvm::PowerOf2Floor(MaxVecRegSize / EltSize); - unsigned MinVF = std::max(2U, R.getMinVecRegSize() / EltSize); + unsigned MinVF = R.getMinVF(EltSize); unsigned MaxVF = std::min(R.getMaximumVF(EltSize, Instruction::Store), MaxElts); @@ -7559,7 +7562,7 @@ } unsigned Sz = R.getVectorElementSize(I0); - unsigned MinVF = std::max(2U, R.getMinVecRegSize() / Sz); + unsigned MinVF = R.getMinVF(Sz); unsigned MaxVF = std::max(PowerOf2Floor(VL.size()), MinVF); MaxVF = std::min(R.getMaximumVF(Sz, S.getOpcode()), MaxVF); if (MaxVF < 2) { diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-load-min-required-vf-2.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-load-min-required-vf-2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-load-min-required-vf-2.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +define void @foo() local_unnamed_addr { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load volatile double, double* poison, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load volatile double, double* poison, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load volatile double, double* poison, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load volatile double, double* poison, align 8 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[D30_0734:%.*]] = phi double [ undef, [[FOR_BODY]] ], [ [[TMP0]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[D01_0733:%.*]] = phi double [ undef, [[FOR_BODY]] ], [ [[TMP1]], [[ENTRY]] ] +; CHECK-NEXT: [[D11_0732:%.*]] = phi double [ undef, [[FOR_BODY]] ], [ [[TMP2]], [[ENTRY]] ] +; CHECK-NEXT: [[D21_0731:%.*]] = phi double [ undef, [[FOR_BODY]] ], [ [[TMP3]], [[ENTRY]] ] +; CHECK-NEXT: br label [[FOR_BODY]] +; +entry: + %0 = load volatile double, double* poison, align 8 + %1 = load volatile double, double* poison, align 8 + %2 = load volatile double, double* poison, align 8 + %3 = load volatile double, double* poison, align 8 + br label %for.body + +for.body: ; preds = %for.body, %entry + %d30.0734 = phi double [ undef, %for.body ], [ %0, %entry ] + %d01.0733 = phi double [ undef, %for.body ], [ %1, %entry ] + %d11.0732 = phi double [ undef, %for.body ], [ %2, %entry ] + %d21.0731 = phi double [ undef, %for.body ], [ %3, %entry ] + br label %for.body +}