diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7076,8 +7076,14 @@ } // end anonymous namespace static Optional getAggregateSize(Instruction *InsertInst) { - if (auto *IE = dyn_cast(InsertInst)) - return cast(IE->getType())->getNumElements(); + if (auto *IE = dyn_cast(InsertInst)) { + auto *FVTy = dyn_cast(IE->getType()); + // For scalable type, num of elements is unknown at compile-time. + if (!FVTy) + return None; + else + return FVTy->getNumElements(); + } unsigned AggregateSize = 1; auto *IV = cast(InsertInst); diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -slp-vectorizer -S 2>%t | FileCheck %s +; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t + +; WARN-NOT: warning + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +define <2 x float> @insertelement-fixed-vector() { +; CHECK-LABEL: @insertelement-fixed-vector( +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x float> @llvm.fabs.v2f32(<2 x float> undef) +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[TMP1]], i32 0 +; CHECK-NEXT: [[I0:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 +; CHECK-NEXT: [[I1:%.*]] = insertelement <2 x float> [[I0]], float [[TMP3]], i32 1 +; CHECK-NEXT: ret <2 x float> [[I1]] +; + %f0 = tail call fast float @llvm.fabs.f32(float undef) + %f1 = tail call fast float @llvm.fabs.f32(float undef) + %i0 = insertelement <2 x float> undef, float %f0, i32 0 + %i1 = insertelement <2 x float> %i0, float %f1, i32 1 + ret <2 x float> %i1 +} + +; TODO: llvm.fabs could be optimized in vector form. It's legal to extract +; elements from fixed-length vector and insert into scalable vector. +define @insertelement-scalable-vector() { +; CHECK-LABEL: @insertelement-scalable-vector( +; CHECK-NEXT: [[F0:%.*]] = tail call fast float @llvm.fabs.f32(float undef) +; CHECK-NEXT: [[F1:%.*]] = tail call fast float @llvm.fabs.f32(float undef) +; CHECK-NEXT: [[I0:%.*]] = insertelement undef, float [[F0]], i32 0 +; CHECK-NEXT: [[I1:%.*]] = insertelement [[I0]], float [[F1]], i32 1 +; CHECK-NEXT: ret [[I1]] +; + %f0 = tail call fast float @llvm.fabs.f32(float undef) + %f1 = tail call fast float @llvm.fabs.f32(float undef) + %i0 = insertelement undef, float %f0, i32 0 + %i1 = insertelement %i0, float %f1, i32 1 + ret %i1 +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.fabs.f32(float)