diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll @@ -1,14 +1,12 @@ ; Do NOT use -O3. It will lower exp2 to ldexp, and the test will fail. -; RUN: opt -vector-library=sleefgnuabi -replace-with-veclib < %s | opt -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-unroll,loop-vectorize -S | FileCheck %s --check-prefixes=CHECK,NEON -; RUN: opt -mattr=+sve -vector-library=sleefgnuabi -replace-with-veclib < %s | opt -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-unroll,loop-vectorize -S | FileCheck %s --check-prefixes=CHECK,SVE +; RUN: opt -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize -S < %s | FileCheck %s --check-prefixes=CHECK,NEON +; RUN: opt -mattr=+sve -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize -S < %s | FileCheck %s --check-prefixes=CHECK,SVE target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-unknown-linux-gnu" declare double @acos(double) #0 declare float @acosf(float) #0 -declare double @llvm.acos.f64(double) #0 -declare float @llvm.acos.f32(float) #0 define void @acos_f64(double* nocapture %varray) { ; CHECK-LABEL: @acos_f64( @@ -60,8 +58,6 @@ declare double @asin(double) #0 declare float @asinf(float) #0 -declare double @llvm.asin.f64(double) #0 -declare float @llvm.asin.f32(float) #0 define void @asin_f64(double* nocapture %varray) { ; CHECK-LABEL: @asin_f64( @@ -113,8 +109,6 @@ declare double @atan(double) #0 declare float @atanf(float) #0 -declare double @llvm.atan.f64(double) #0 -declare float @llvm.atan.f32(float) #0 define void @atan_f64(double* nocapture %varray) { ; CHECK-LABEL: @atan_f64( @@ -166,8 +160,6 @@ declare double @atan2(double, double) #0 declare float @atan2f(float, float) #0 -declare double @llvm.atan2.f64(double, double) #0 -declare float @llvm.atan2.f32(float, float) #0 define void @atan2_f64(double* nocapture %varray) { ; CHECK-LABEL: @atan2_f64( @@ -219,8 +211,6 @@ declare double @atanh(double) #0 declare float @atanhf(float) #0 -declare double @llvm.atanh.f64(double) #0 -declare float @llvm.atanh.f32(float) #0 define void @atanh_f64(double* nocapture %varray) { ; CHECK-LABEL: @atanh_f64( @@ -272,8 +262,6 @@ declare double @cos(double) #0 declare float @cosf(float) #0 -declare double @llvm.cos.f64(double) #0 -declare float @llvm.cos.f32(float) #0 define void @cos_f64(double* nocapture %varray) { ; CHECK-LABEL: @cos_f64( @@ -325,8 +313,6 @@ declare double @cosh(double) #0 declare float @coshf(float) #0 -declare double @llvm.cosh.f64(double) #0 -declare float @llvm.cosh.f32(float) #0 define void @cosh_f64(double* nocapture %varray) { ; CHECK-LABEL: @cosh_f64( @@ -378,8 +364,6 @@ declare double @exp(double) #0 declare float @expf(float) #0 -declare double @llvm.exp.f64(double) #0 -declare float @llvm.exp.f32(float) #0 define void @exp_f64(double* nocapture %varray) { ; CHECK-LABEL: @exp_f64( @@ -431,8 +415,6 @@ declare double @exp2(double) #0 declare float @exp2f(float) #0 -declare double @llvm.exp2.f64(double) #0 -declare float @llvm.exp2.f32(float) #0 define void @exp2_f64(double* nocapture %varray) { ; CHECK-LABEL: @exp2_f64( @@ -484,8 +466,6 @@ declare double @exp10(double) #0 declare float @exp10f(float) #0 -declare double @llvm.exp10.f64(double) #0 -declare float @llvm.exp10.f32(float) #0 define void @exp10_f64(double* nocapture %varray) { ; CHECK-LABEL: @exp10_f64( @@ -586,8 +566,6 @@ declare double @lgamma(double) #0 declare float @lgammaf(float) #0 -declare double @llvm.lgamma.f64(double) #0 -declare float @llvm.lgamma.f32(float) #0 define void @lgamma_f64(double* nocapture %varray) { ; CHECK-LABEL: @lgamma_f64( @@ -639,8 +617,6 @@ declare double @log10(double) #0 declare float @log10f(float) #0 -declare double @llvm.log10.f64(double) #0 -declare float @llvm.log10.f32(float) #0 define void @log10_f64(double* nocapture %varray) { ; CHECK-LABEL: @log10_f64( @@ -692,8 +668,6 @@ declare double @log2(double) #0 declare float @log2f(float) #0 -declare double @llvm.log2.f64(double) #0 -declare float @llvm.log2.f32(float) #0 define void @log2_f64(double* nocapture %varray) { ; CHECK-LABEL: @log2_f64( @@ -745,8 +719,6 @@ declare double @log(double) #0 declare float @logf(float) #0 -declare double @llvm.log.f64(double) #0 -declare float @llvm.log.f32(float) #0 define void @log_f64(double* nocapture %varray) { ; CHECK-LABEL: @log_f64( @@ -798,8 +770,6 @@ declare double @pow(double, double) #0 declare float @powf(float, float) #0 -declare double @llvm.pow.f64(double, double) #0 -declare float @llvm.pow.f32(float, float) #0 define void @pow_f64(double* nocapture %varray) { ; CHECK-LABEL: @pow_f64( @@ -851,8 +821,6 @@ declare double @sin(double) #0 declare float @sinf(float) #0 -declare double @llvm.sin.f64(double) #0 -declare float @llvm.sin.f32(float) #0 define void @sin_f64(double* nocapture %varray) { ; CHECK-LABEL: @sin_f64( @@ -904,8 +872,6 @@ declare double @sinh(double) #0 declare float @sinhf(float) #0 -declare double @llvm.sinh.f64(double) #0 -declare float @llvm.sinh.f32(float) #0 define void @sinh_f64(double* nocapture %varray) { ; CHECK-LABEL: @sinh_f64( @@ -957,8 +923,6 @@ declare double @sqrt(double) #0 declare float @sqrtf(float) #0 -declare double @llvm.sqrt.f64(double) #0 -declare float @llvm.sqrt.f32(float) #0 define void @sqrt_f64(double* nocapture %varray) { ; CHECK-LABEL: @sqrt_f64( @@ -1008,58 +972,8 @@ ret void } -define void @llvm_sqrt_f64(double* nocapture %varray) { - ; CHECK-LABEL: @llvm_sqrt_f64( - ; NEON: [[TMP5:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call fast @llvm.sqrt.nxv2f64( [[TMP4:%.*]]) - ; CHECK: ret void - ; - entry: - br label %for.body - - for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %tmp = trunc i64 %iv to i32 - %conv = sitofp i32 %tmp to double - %call = tail call fast double @llvm.sqrt.f64(double %conv) - %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv - store double %call, double* %arrayidx, align 8 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond = icmp eq i64 %iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body - - for.end: - ret void -} - -define void @llvm_sqrt_f32(float* nocapture %varray) { - ; CHECK-LABEL: @llvm_sqrt_f32( - ; NEON: [[TMP5:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call fast @llvm.sqrt.nxv4f32( [[TMP4:%.*]]) - ; CHECK: ret void - ; - entry: - br label %for.body - - for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %tmp = trunc i64 %iv to i32 - %conv = sitofp i32 %tmp to float - %call = tail call fast float @llvm.sqrt.f32(float %conv) - %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv - store float %call, float* %arrayidx, align 4 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond = icmp eq i64 %iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body - - for.end: - ret void -} - declare double @tan(double) #0 declare float @tanf(float) #0 -declare double @llvm.tan.f64(double) #0 -declare float @llvm.tan.f32(float) #0 define void @tan_f64(double* nocapture %varray) { ; CHECK-LABEL: @tan_f64( @@ -1111,8 +1025,6 @@ declare double @tanh(double) #0 declare float @tanhf(float) #0 -declare double @llvm.tanh.f64(double) #0 -declare float @llvm.tanh.f32(float) #0 define void @tanh_f64(double* nocapture %varray) { ; CHECK-LABEL: @tanh_f64( @@ -1164,8 +1076,6 @@ declare double @tgamma(double) #0 declare float @tgammaf(float) #0 -declare double @llvm.tgamma.f64(double) #0 -declare float @llvm.tgamma.f32(float) #0 define void @tgamma_f64(double* nocapture %varray) { ; CHECK-LABEL: @tgamma_f64( diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sleef-intrinsic-calls-aarch64.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sleef-intrinsic-calls-aarch64.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sleef-intrinsic-calls-aarch64.ll @@ -0,0 +1,1032 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; Do NOT use -O3. It will lower exp2 to ldexp, and the test will fail. +; RUN: opt -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize -S < %s | FileCheck %s --check-prefixes=CHECK,NEON +; RUN: opt -mattr=+sve -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize -S < %s | FileCheck %s --check-prefixes=CHECK,SVE + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + + +; Tests are checking if LV can vectorize loops with llvm math intrinsics using mappings +; from TLI (if such mappings exist) for scalable and fixed width vectors. + +declare double @llvm.ceil.f64(double) #0 +declare float @llvm.ceil.f32(float) #0 + +define void @llvm_ceil_f64(double* nocapture %varray) { +; CHECK-LABEL: define void @llvm_ceil_f64 +; NEON: [[TMP5:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @llvm.ceil.nxv2f64( [[TMP17:%.*]]) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.ceil.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_ceil_f32(float* nocapture %varray) { +; CHECK-LABEL: define void @llvm_ceil_f32 +; NEON: [[TMP5:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @llvm.ceil.nxv4f32( [[TMP17:%.*]]) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.ceil.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.copysign.f64(double, double) #0 +declare float @llvm.copysign.f32(float, float) #0 + +define void @llvm_copysign_f64(double* nocapture %varray) { +; CHECK-LABEL: define void @llvm_copysign_f64 +; NEON: [[TMP5:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[TMP3:%.*]], <2 x double> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @llvm.copysign.nxv2f64( [[TMP17:%.*]], [[TMP17:%.*]]) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.copysign.f64(double %conv, double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_copysign_f32(float* nocapture %varray) { +; CHECK-LABEL: define void @llvm_copysign_f32 +; NEON: [[TMP5:%.*]] = call <4 x float> @llvm.copysign.v4f32(<4 x float> [[TMP3:%.*]], <4 x float> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @llvm.copysign.nxv4f32( [[TMP17:%.*]], [[TMP17:%.*]]) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.copysign.f32(float %conv, float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.cos.f64(double) #0 +declare float @llvm.cos.f32(float) #0 + +define void @llvm_cos_f64(double* nocapture %varray) { +; CHECK-LABEL: define void @llvm_cos_f64 +; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_cos(<2 x double> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @_ZGVsMxv_cos( [[TMP17:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.cos.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_cos_f32(float* nocapture %varray) { +; CHECK-LABEL: define void @llvm_cos_f32 +; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_cosf(<4 x float> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @_ZGVsMxv_cosf( [[TMP17:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.cos.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.exp.f64(double) #0 +declare float @llvm.exp.f32(float) #0 + +define void @llvm_exp_f64(double* nocapture %varray) { +; CHECK-LABEL: define void @llvm_exp_f64 +; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_exp(<2 x double> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @_ZGVsMxv_exp( [[TMP17:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.exp.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_exp_f32(float* nocapture %varray) { +; CHECK-LABEL: define void @llvm_exp_f32 +; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_expf(<4 x float> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @_ZGVsMxv_expf( [[TMP17:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.exp.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.exp2.f64(double) #0 +declare float @llvm.exp2.f32(float) #0 + +define void @llvm_exp2_f64(double* nocapture %varray) { +; CHECK-LABEL: define void @llvm_exp2_f64 +; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_exp2(<2 x double> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @_ZGVsMxv_exp2( [[TMP17:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.exp2.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_exp2_f32(float* nocapture %varray) { +; CHECK-LABEL: define void @llvm_exp2_f32 +; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @_ZGVsMxv_exp2f( [[TMP17:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.exp2.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.fabs.f64(double) #0 +declare float @llvm.fabs.f32(float) #0 + +define void @llvm_fabs_f64(double* nocapture %varray) { +; CHECK-LABEL: define void @llvm_fabs_f64 +; NEON: [[TMP5:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @llvm.fabs.nxv2f64( [[TMP17:%.*]]) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.fabs.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + + +define void @llvm_fabs_f32(float* nocapture %varray) { +; CHECK-LABEL: define void @llvm_fabs_f32 +; NEON: [[TMP5:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @llvm.fabs.nxv4f32( [[TMP17:%.*]]) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.fabs.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.floor.f64(double) #0 +declare float @llvm.floor.f32(float) #0 + +define void @llvm_floor_f64(double* nocapture %varray) { +; CHECK-LABEL: define void @llvm_floor_f64 +; NEON: [[TMP5:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @llvm.floor.nxv2f64( [[TMP17:%.*]]) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.floor.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_floor_f32(float* nocapture %varray) { +; CHECK-LABEL: define void @llvm_floor_f32 +; NEON: [[TMP5:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @llvm.floor.nxv4f32( [[TMP17:%.*]]) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.floor.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.fma.f64(double, double, double) #0 +declare float @llvm.fma.f32(float, float, float) #0 + +define void @llvm_fma_f64(double* nocapture %varray) { +; CHECK-LABEL: define void @llvm_fma_f64 +; NEON: [[TMP5:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP3:%.*]], <2 x double> [[TMP3:%.*]], <2 x double> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @llvm.fma.nxv2f64( [[TMP17:%.*]], [[TMP17:%.*]], [[TMP17:%.*]]) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.fma.f64(double %conv, double %conv, double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_fma_f32(float* nocapture %varray) { +; CHECK-LABEL: define void @llvm_fma_f32 +; NEON: [[TMP5:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP3:%.*]], <4 x float> [[TMP3:%.*]], <4 x float> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @llvm.fma.nxv4f32( [[TMP17:%.*]], [[TMP17:%.*]], [[TMP17:%.*]]) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.fma.f32(float %conv, float %conv, float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.log.f64(double) #0 +declare float @llvm.log.f32(float) #0 + +define void @llvm_log_f64(double* nocapture %varray) { +; CHECK-LABEL: define void @llvm_log_f64 +; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_log(<2 x double> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @_ZGVsMxv_log( [[TMP17:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.log.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_log_f32(float* nocapture %varray) { +; CHECK-LABEL: define void @llvm_log_f32 +; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_logf(<4 x float> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @_ZGVsMxv_logf( [[TMP17:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.log.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.log10.f64(double) #0 +declare float @llvm.log10.f32(float) #0 + +define void @llvm_log10_f64(double* nocapture %varray) { +; CHECK-LABEL: define void @llvm_log10_f64 +; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_log10(<2 x double> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @_ZGVsMxv_log10( [[TMP17:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.log10.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_log10_f32(float* nocapture %varray) { +; CHECK-LABEL: define void @llvm_log10_f32 +; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_log10f(<4 x float> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @_ZGVsMxv_log10f( [[TMP17:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.log10.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.log2.f64(double) #0 +declare float @llvm.log2.f32(float) #0 + +define void @llvm_log2_f64(double* nocapture %varray) { +; CHECK-LABEL: define void @llvm_log2_f64 +; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_log2(<2 x double> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @_ZGVsMxv_log2( [[TMP17:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.log2.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_log2_f32(float* nocapture %varray) { +; CHECK-LABEL: define void @llvm_log2_f32 +; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_log2f(<4 x float> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @_ZGVsMxv_log2f( [[TMP17:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.log2.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.maxnum.f64(double, double) #0 +declare float @llvm.maxnum.f32(float, float) #0 + +define void @llvm_maxnum_f64(double* nocapture %varray) { +; CHECK-LABEL: define void @llvm_maxnum_f64 +; NEON: [[TMP5:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[TMP3:%.*]], <2 x double> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @llvm.maxnum.nxv2f64( [[TMP17:%.*]], [[TMP17:%.*]]) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.maxnum.f64(double %conv, double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_maxnum_f32(float* nocapture %varray) { +; CHECK-LABEL: define void @llvm_maxnum_f32 +; NEON: [[TMP5:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP3:%.*]], <4 x float> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @llvm.maxnum.nxv4f32( [[TMP17:%.*]], [[TMP17:%.*]]) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.maxnum.f32(float %conv, float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.minnum.f64(double, double) #0 +declare float @llvm.minnum.f32(float, float) #0 + +define void @llvm_minnum_f64(double* nocapture %varray) { +; CHECK-LABEL: define void @llvm_minnum_f64 +; NEON: [[TMP5:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[TMP3:%.*]], <2 x double> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @llvm.minnum.nxv2f64( [[TMP17:%.*]], [[TMP17:%.*]]) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.minnum.f64(double %conv, double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_minnum_f32(float* nocapture %varray) { +; CHECK-LABEL: define void @llvm_minnum_f32 +; NEON: [[TMP5:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP3:%.*]], <4 x float> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @llvm.minnum.nxv4f32( [[TMP17:%.*]], [[TMP17:%.*]]) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.minnum.f32(float %conv, float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.nearbyint.f64(double) #0 +declare float @llvm.nearbyint.f32(float) #0 + +define void @llvm_nearbyint_f64(double* nocapture %varray) { +; CHECK-LABEL: define void @llvm_nearbyint_f64 +; NEON: [[TMP5:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @llvm.nearbyint.nxv2f64( [[TMP17:%.*]]) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.nearbyint.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_nearbyint_f32(float* nocapture %varray) { +; CHECK-LABEL: define void @llvm_nearbyint_f32 +; NEON: [[TMP5:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @llvm.nearbyint.nxv4f32( [[TMP17:%.*]]) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.nearbyint.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.pow.f64(double, double) #0 +declare float @llvm.pow.f32(float, float) #0 + +define void @llvm_pow_f64(double* nocapture %varray) { +; CHECK-LABEL: define void @llvm_pow_f64 +; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2vv_pow(<2 x double> [[TMP3:%.*]], <2 x double> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @_ZGVsMxvv_pow( [[TMP17:%.*]], [[TMP17:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.pow.f64(double %conv, double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_pow_f32(float* nocapture %varray) { +; CHECK-LABEL: define void @llvm_pow_f32 +; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4vv_powf(<4 x float> [[TMP3:%.*]], <4 x float> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @_ZGVsMxvv_powf( [[TMP17:%.*]], [[TMP17:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.pow.f32(float %conv, float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.rint.f64(double) #0 +declare float @llvm.rint.f32(float) #0 + +define void @llvm_rint_f64(double* nocapture %varray) { +; CHECK-LABEL: define void @llvm_rint_f64 +; NEON: [[TMP5:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @llvm.rint.nxv2f64( [[TMP17:%.*]]) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.rint.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_rint_f32(float* nocapture %varray) { +; CHECK-LABEL: define void @llvm_rint_f32 +; NEON: [[TMP5:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @llvm.rint.nxv4f32( [[TMP17:%.*]]) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.rint.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.round.f64(double) #0 +declare float @llvm.round.f32(float) #0 + +define void @llvm_round_f64(double* nocapture %varray) { +; CHECK-LABEL: define void @llvm_round_f64 +; NEON: [[TMP5:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @llvm.round.nxv2f64( [[TMP17:%.*]]) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.round.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_round_f32(float* nocapture %varray) { +; CHECK-LABEL: define void @llvm_round_f32 +; NEON: [[TMP5:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @llvm.round.nxv4f32( [[TMP17:%.*]]) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.round.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.sin.f64(double) #0 +declare float @llvm.sin.f32(float) #0 + +define void @llvm_sin_f64(double* nocapture %varray) { +; CHECK-LABEL: define void @llvm_sin_f64 +; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_sin(<2 x double> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @_ZGVsMxv_sin( [[TMP17:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.sin.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_sin_f32(float* nocapture %varray) { +; CHECK-LABEL: define void @llvm_sin_f32 +; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_sinf(<4 x float> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @_ZGVsMxv_sinf( [[TMP17:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.sin.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.sqrt.f64(double) #0 +declare float @llvm.sqrt.f32(float) #0 + +define void @llvm_sqrt_f64(double* nocapture %varray) { +; CHECK-LABEL: define void @llvm_sqrt_f64 +; NEON: [[TMP5:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @llvm.sqrt.nxv2f64( [[TMP17:%.*]]) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.sqrt.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_sqrt_f32(float* nocapture %varray) { +; CHECK-LABEL: define void @llvm_sqrt_f32 +; NEON: [[TMP5:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @llvm.sqrt.nxv4f32( [[TMP17:%.*]]) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.sqrt.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @llvm.trunc.f64(double) #0 +declare float @llvm.trunc.f32(float) #0 + +define void @llvm_trunc_f64(double* nocapture %varray) { +; CHECK-LABEL: define void @llvm_trunc_f64 +; NEON: [[TMP5:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @llvm.trunc.nxv2f64( [[TMP17:%.*]]) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.trunc.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @llvm_trunc_f32(float* nocapture %varray) { +; CHECK-LABEL: define void @llvm_trunc_f32 +; NEON: [[TMP5:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP3:%.*]]) +; SVE: [[TMP19:%.*]] = call @llvm.trunc.nxv4f32( [[TMP17:%.*]]) +; CHECK: ret void +; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.trunc.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +}