Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7892,6 +7892,13 @@ } case Instruction::ExtractValue: return TTI.getInstructionCost(I, TTI::TCK_RecipThroughput); + case Instruction::Alloca: + // We cannot easily widen alloca to a scalable alloca, as + // the result would need to be a vector of pointers. + // Return an Invalid cost if the VF is Scalable. + if (VF.isScalable()) + return InstructionCost::getInvalid(); + LLVM_FALLTHROUGH; default: // This opcode is unknown. Assume that it is the same as 'mul'. return TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, CostKind); Index: llvm/test/Transforms/LoopVectorize/AArch64/scalable-alloca.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/AArch64/scalable-alloca.ll @@ -0,0 +1,52 @@ +; RUN: opt -S -loop-vectorize -mattr=+sve -mtriple aarch64-unknown-linux-gnu -scalable-vectorization=preferred -pass-remarks-analysis=loop-vectorize < %s 2>%t | FileCheck %s +; RUN: FileCheck %s --check-prefix=CHECK-REMARKS < %t + +; CHECK-REMARKS: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): alloca +; CHECK-REMARKS: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): store +define void @alloca(i32** %vla, i64 %N) { +; CHECK-LABEL: @alloca( +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] +; CHECK-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = alloca i32, align 16 +; CHECK-NEXT: [[TMP1:%.*]] = alloca i32, align 16 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32*, i32** [[VLA:%.*]], i64 [[INDUCTION]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32*, i32** [[VLA]], i64 [[INDUCTION1]] +; CHECK-NEXT: store i32* [[TMP0]], i32** [[TMP2]], align 8 +; CHECK-NEXT: store i32* [[TMP1]], i32** [[TMP3]], align 8 +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ {{.*}}, %middle.block ], [ 0, %entry ] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %for.body ], [ [[BC_RESUME_VAL]], %scalar.ph ] +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i32, align 16 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32*, i32** [[VLA]], i64 [[IV]] +; CHECK-NEXT: store i32* [[ALLOCA]], i32** [[ARRAYIDX]], align 8 +; CHECK: for.end: +; CHECK-NEXT: call void @foo(i32** nonnull [[VLA]]) +; CHECK-NEXT: ret void +; + +; CHECK-NOT: