diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -82,7 +82,6 @@ ElementCount Factor) : RetTy(CI.getType()), IID(Id), VF(Factor) { - assert(!Factor.isScalable() && "Scalable vectors are not yet supported"); if (auto *FPMO = dyn_cast(&CI)) FMF = FPMO->getFastMathFlags(); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3753,7 +3753,6 @@ InstructionCost LoopVectorizationCostModel::getVectorCallCost(CallInst *CI, ElementCount VF, bool &NeedToScalarize) { - assert(!VF.isScalable() && "scalable vectors not yet supported."); Function *F = CI->getCalledFunction(); Type *ScalarRetTy = CI->getType(); SmallVector Tys, ScalarTys; @@ -4967,10 +4966,8 @@ if (UseVectorIntrinsic) { // Use vector version of the intrinsic. Type *TysForDecl[] = {CI->getType()}; - if (VF.isVector()) { - assert(!VF.isScalable() && "VF is assumed to be non scalable."); + if (VF.isVector()) TysForDecl[0] = VectorType::get(CI->getType()->getScalarType(), VF); - } VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl); assert(VectorF && "Can't retrieve vector intrinsic."); } else { @@ -7042,8 +7039,9 @@ LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I, ElementCount VF) { - assert(!VF.isScalable() && - "cannot compute scalarization overhead for scalable vectorization"); + if (VF.isScalable()) + return InstructionCost::getInvalid(); + if (VF.isScalar()) return 0; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll @@ -0,0 +1,111 @@ +; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu < %s | FileCheck %s + +define void @vec_load(i64 %N, double* nocapture %a, double* nocapture readonly %b) { +; CHECK-LABEL: @vec_load +; CHECK: vector.body: +; CHECK: %[[LOAD:.*]] = load , * +; CHECK: call @foo_vec( %[[LOAD]]) +entry: + %cmp7 = icmp sgt i64 %N, 0 + br i1 %cmp7, label %for.body, label %for.end + +for.body: ; preds = %for.body.preheader, %for.body + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, double* %b, i64 %iv + %0 = load double, double* %arrayidx, align 8 + %1 = call double @foo(double %0) #0 + %add = fadd double %1, 1.000000e+00 + %arrayidx2 = getelementptr inbounds double, double* %a, i64 %iv + store double %add, double* %arrayidx2, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1 + +for.end: ; preds = %for.body, %entry + ret void +} + +define void @vec_scalar(i64 %N, double* nocapture %a) { +; CHECK-LABEL: @vec_scalar +; CHECK: vector.body: +; CHECK: call @foo_vec( shufflevector ( insertelement ( poison, double 1.000000e+01, i32 0), poison, zeroinitializer)) +entry: + %cmp7 = icmp sgt i64 %N, 0 + br i1 %cmp7, label %for.body, label %for.end + +for.body: ; preds = %for.body.preheader, %for.body + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %0 = call double @foo(double 10.0) #0 + %sub = fsub double %0, 1.000000e+00 + %arrayidx = getelementptr inbounds double, double* %a, i64 %iv + store double %sub, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1 + +for.end: ; preds = %for.body, %entry + ret void +} + +define void @vec_ptr(i64 %N, i64* noalias %a, i64** readnone %b) { +; CHECK-LABEL: @vec_ptr +; CHECK: vector.body: +; CHECK: %[[LOAD:.*]] = load , * +; CHECK: call @bar_vec( %[[LOAD]]) +entry: + %cmp7 = icmp sgt i64 %N, 0 + br i1 %cmp7, label %for.body, label %for.end + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %gep = getelementptr i64*, i64** %b, i64 %iv + %load = load i64*, i64** %gep + %call = call i64 @bar(i64* %load) #1 + %arrayidx = getelementptr inbounds i64, i64* %a, i64 %iv + store i64 %call, i64* %arrayidx + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1 + +for.end: + ret void +} + +define void @vec_intrinsic(i64 %N, double* nocapture readonly %a) { +; CHECK-LABEL: @vec_intrinsic +; CHECK: vector.body: +; CHECK: %[[LOAD:.*]] = load , * +; CHECK: call fast @sin_vec( %[[LOAD]]) +entry: + %cmp7 = icmp sgt i64 %N, 0 + br i1 %cmp7, label %for.body, label %for.end + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, double* %a, i64 %iv + %0 = load double, double* %arrayidx, align 8 + %1 = call fast double @llvm.sin.f64(double %0) #2 + %add = fadd fast double %1, 1.000000e+00 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %N + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1 + +for.end: + ret void +} + +declare double @foo(double) +declare i64 @bar(i64*) +declare double @llvm.sin.f64(double) + +declare @foo_vec() +declare @bar_vec() +declare @sin_vec() + +attributes #0 = { "vector-function-abi-variant"="_ZGV_LLVM_Nxv_foo(foo_vec)" } +attributes #1 = { "vector-function-abi-variant"="_ZGV_LLVM_Nxv_bar(bar_vec)" } +attributes #2 = { "vector-function-abi-variant"="_ZGV_LLVM_Nxv_llvm.sin.f64(sin_vec)" } + +!1 = distinct !{!1, !2, !3} +!2 = !{!"llvm.loop.vectorize.width", i32 2} +!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}