diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6592,8 +6592,13 @@ I <= End; ++I) if (Mask[I] != UndefMaskElem) Mask[I] = I + VecSz; - for (unsigned I = OffsetEnd + 1 - Offset; I < VecSz; ++I) + for (unsigned I = OffsetEnd + 1 - Offset; I < VecSz; ++I) { + if (I >= InMask.size()) { + Mask[I] = UndefMaskElem; + continue; + } Mask[I] = InMask.test(I) ? UndefMaskElem : I; + } Cost += TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, InsertVecTy, Mask); } } diff --git a/llvm/test/Transforms/SLPVectorizer/slp-non-pow-2-insertelement.ll b/llvm/test/Transforms/SLPVectorizer/slp-non-pow-2-insertelement.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/slp-non-pow-2-insertelement.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes=slp-vectorizer < %s | FileCheck %s + +define void @PR58863() { +; CHECK-LABEL: @PR58863( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MUL_I:%.*]] = fmul float poison, poison +; CHECK-NEXT: [[MUL11_I:%.*]] = fmul float poison, poison +; CHECK-NEXT: [[I:%.*]] = insertelement <3 x float> , float [[MUL_I]], i64 0 +; CHECK-NEXT: [[I1:%.*]] = insertelement <3 x float> [[I]], float [[MUL11_I]], i64 2 +; CHECK-NEXT: ret void +; +entry: + %mul.i = fmul float poison, poison + %mul11.i = fmul float poison, poison + %i = insertelement <3 x float> , float %mul.i, i64 0 + %i1 = insertelement <3 x float> %i, float %mul11.i, i64 2 + ret void +}