diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5899,25 +5899,15 @@ LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize a list of length = " << VL.size() << ".\n"); - // Check that all of the parts are scalar instructions of the same type, + // Check that all of the parts are instructions of the same type, // we permit an alternate opcode via InstructionsState. InstructionsState S = getSameOpcode(VL); if (!S.getOpcode()) return false; Instruction *I0 = cast(S.OpValue); - unsigned Sz = R.getVectorElementSize(I0); - unsigned MinVF = std::max(2U, R.getMinVecRegSize() / Sz); - unsigned MaxVF = std::max(PowerOf2Floor(VL.size()), MinVF); - if (MaxVF < 2) { - R.getORE()->emit([&]() { - return OptimizationRemarkMissed(SV_NAME, "SmallVF", I0) - << "Cannot SLP vectorize list: vectorization factor " - << "less than 2 is not supported"; - }); - return false; - } - + // Make sure invalid types (including vector type) are rejected before + // determining vectorization factor for scalar instructions. for (Value *V : VL) { Type *Ty = V->getType(); if (!isValidElementType(Ty)) { @@ -5935,6 +5925,18 @@ } } + unsigned Sz = R.getVectorElementSize(I0); + unsigned MinVF = std::max(2U, R.getMinVecRegSize() / Sz); + unsigned MaxVF = std::max(PowerOf2Floor(VL.size()), MinVF); + if (MaxVF < 2) { + R.getORE()->emit([&]() { + return OptimizationRemarkMissed(SV_NAME, "SmallVF", I0) + << "Cannot SLP vectorize list: vectorization factor " + << "less than 2 is not supported"; + }); + return false; + } + bool Changed = false; bool CandidateFound = false; int MinCost = SLPCostThreshold; diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/invalid_type.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/invalid_type.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/invalid_type.ll @@ -0,0 +1,18 @@ +; RUN: opt < %s -slp-vectorizer -S -pass-remarks-missed=slp-vectorizer 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +; This test check that slp vectorizer is not trying to vectorize instructions already vectorized. +; CHECK: remark: :0:0: Cannot SLP vectorize list: type <16 x i8> is unsupported by vectorizer + +define void @vector() { + %load0 = tail call <16 x i8> @vector.load(<16 x i8> *undef, i32 1) + %load1 = tail call <16 x i8> @vector.load(<16 x i8> *undef, i32 2) + %add = add <16 x i8> %load1, %load0 + tail call void @vector.store(<16 x i8> %add, <16 x i8>* undef, i32 1) + ret void +} + +declare <16 x i8> @vector.load(<16 x i8>*, i32) +declare void @vector.store(<16 x i8>, <16 x i8>*, i32) diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-vector.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -slp-vectorizer -S | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +; This test check that we are not crashing or changing the code. + +define void @test() { +; CHECK-LABEL: @test( +; CHECK-NEXT: [[LOAD0:%.*]] = tail call @llvm.masked.load.nxv16i8.p0nxv16i8(* undef, i32 1, undef, undef) +; CHECK-NEXT: [[LOAD1:%.*]] = tail call @llvm.masked.load.nxv16i8.p0nxv16i8(* undef, i32 1, undef, undef) +; CHECK-NEXT: [[ADD:%.*]] = add [[LOAD1]], [[LOAD0]] +; CHECK-NEXT: tail call void @llvm.masked.store.nxv16i8.p0nxv16i8( [[ADD]], * undef, i32 1, undef) +; CHECK-NEXT: ret void +; + %load0 = tail call @llvm.masked.load.nxv16i8.p0nxv16i8( *undef, i32 1, undef, undef) + %load1 = tail call @llvm.masked.load.nxv16i8.p0nxv16i8( *undef, i32 1, undef, undef) + %add = add %load1, %load0 + tail call void @llvm.masked.store.nxv16i8.p0nxv16i8( %add, * undef, i32 1, undef) + ret void +} + +declare @llvm.masked.load.nxv16i8.p0nxv16i8(*, i32 immarg, , ) +declare void @llvm.masked.store.nxv16i8.p0nxv16i8(, *, i32 immarg, )