diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def --- a/llvm/include/llvm/Analysis/VecFuncs.def +++ b/llvm/include/llvm/Analysis/VecFuncs.def @@ -525,7 +525,6 @@ TLI_DEFINE_VECFUNC( "llvm.sinh.f64", "_ZGVnN2v_sinh", FIXED(2)) TLI_DEFINE_VECFUNC( "sqrt", "_ZGVnN2v_sqrt", FIXED(2)) -TLI_DEFINE_VECFUNC( "llvm.sqrt.f64", "_ZGVnN2v_sqrt", FIXED(2)) TLI_DEFINE_VECFUNC( "tan", "_ZGVnN2v_tan", FIXED(2)) TLI_DEFINE_VECFUNC( "llvm.tan.f64", "_ZGVnN2v_tan", FIXED(2)) @@ -595,7 +594,6 @@ TLI_DEFINE_VECFUNC( "llvm.sinh.f32", "_ZGVnN4v_sinhf", FIXED(4)) TLI_DEFINE_VECFUNC( "sqrtf", "_ZGVnN4v_sqrtf", FIXED(4)) -TLI_DEFINE_VECFUNC( "llvm.sqrt.f32", "_ZGVnN4v_sqrtf", FIXED(4)) TLI_DEFINE_VECFUNC( "tanf", "_ZGVnN4v_tanf", FIXED(4)) TLI_DEFINE_VECFUNC( "llvm.tan.f32", "_ZGVnN4v_tanf", FIXED(4)) diff --git a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sqrt.ll b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sqrt.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sqrt.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -vector-library=sleefgnuabi -replace-with-veclib -S < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +define <2 x double> @llvm_sqrt_f64(<2 x double> %in) { +; CHECK-LABEL: define {{[^@]+}}@llvm_sqrt_f64 +; CHECK-SAME: (<2 x double> [[IN:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[IN]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %in) + ret <2 x double> %1 +} + +define <4 x float> @llvm_sqrt_f32(<4 x float> %in) { +; CHECK-LABEL: define {{[^@]+}}@llvm_sqrt_f32 +; CHECK-SAME: (<4 x float> [[IN:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[IN]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %in) + ret <4 x float> %1 +} + +define @llvm_sqrt_vscale_f64( %in) { +; CHECK-LABEL: define {{[^@]+}}@llvm_sqrt_vscale_f64 +; CHECK-SAME: ( [[IN:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.sqrt.nxv2f64( [[IN]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.sqrt.nxv2f64( %in) + ret %1 +} + +define @llvm_sqrt_vscale_f32( %in) { +; CHECK-LABEL: define {{[^@]+}}@llvm_sqrt_vscale_f32 +; CHECK-SAME: ( [[IN:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call fast @llvm.sqrt.nxv4f32( [[IN]]) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call fast @llvm.sqrt.nxv4f32( %in) + ret %1 +} + + +declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) +declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) +declare @llvm.sqrt.nxv2f64() +declare @llvm.sqrt.nxv4f32() diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll @@ -922,6 +922,53 @@ ret void } + +define void @llvm_sqrt_f64(double* nocapture %varray) { + ; CHECK-LABEL: @llvm_sqrt_f64( + ; CHECK: [[TMP5:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call fast double @llvm.sqrt.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_sqrt_f32(float* nocapture %varray) { + ; CHECK-LABEL: @llvm_sqrt_f32( + ; CHECK: [[TMP5:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call fast float @llvm.sqrt.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + declare double @tan(double) #0 declare float @tanf(float) #0 declare double @llvm.tan.f64(double) #0