diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def --- a/llvm/include/llvm/Analysis/VecFuncs.def +++ b/llvm/include/llvm/Analysis/VecFuncs.def @@ -269,6 +269,54 @@ TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f8", 8) TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f16", 16) +TLI_DEFINE_VECFUNC("log10", "__svml_log102", 2) +TLI_DEFINE_VECFUNC("log10", "__svml_log104", 4) +TLI_DEFINE_VECFUNC("log10", "__svml_log108", 8) + +TLI_DEFINE_VECFUNC("log10f", "__svml_log10f4", 4) +TLI_DEFINE_VECFUNC("log10f", "__svml_log10f8", 8) +TLI_DEFINE_VECFUNC("log10f", "__svml_log10f16", 16) + +TLI_DEFINE_VECFUNC("__log10_finite", "__svml_log102", 2) +TLI_DEFINE_VECFUNC("__log10_finite", "__svml_log104", 4) +TLI_DEFINE_VECFUNC("__log10_finite", "__svml_log108", 8) + +TLI_DEFINE_VECFUNC("__log10f_finite", "__svml_log10f4", 4) +TLI_DEFINE_VECFUNC("__log10f_finite", "__svml_log10f8", 8) +TLI_DEFINE_VECFUNC("__log10f_finite", "__svml_log10f16", 16) + +TLI_DEFINE_VECFUNC("llvm.log10.f64", "__svml_log102", 2) +TLI_DEFINE_VECFUNC("llvm.log10.f64", "__svml_log104", 4) +TLI_DEFINE_VECFUNC("llvm.log10.f64", "__svml_log108", 8) + +TLI_DEFINE_VECFUNC("llvm.log10.f32", "__svml_log10f4", 4) +TLI_DEFINE_VECFUNC("llvm.log10.f32", "__svml_log10f8", 8) +TLI_DEFINE_VECFUNC("llvm.log10.f32", "__svml_log10f16", 16) + +TLI_DEFINE_VECFUNC("sqrt", "__svml_sqrt2", 2) +TLI_DEFINE_VECFUNC("sqrt", "__svml_sqrt4", 4) +TLI_DEFINE_VECFUNC("sqrt", "__svml_sqrt8", 8) + +TLI_DEFINE_VECFUNC("sqrtf", "__svml_sqrtf4", 4) +TLI_DEFINE_VECFUNC("sqrtf", "__svml_sqrtf8", 8) +TLI_DEFINE_VECFUNC("sqrtf", "__svml_sqrtf16", 16) + +TLI_DEFINE_VECFUNC("__sqrt_finite", "__svml_sqrt2", 2) +TLI_DEFINE_VECFUNC("__sqrt_finite", "__svml_sqrt4", 4) +TLI_DEFINE_VECFUNC("__sqrt_finite", "__svml_sqrt8", 8) + +TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf4", 4) +TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf8", 8) +TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf16", 16) + +TLI_DEFINE_VECFUNC("llvm.sqrt.f64", "__svml_sqrt2", 2) +TLI_DEFINE_VECFUNC("llvm.sqrt.f64", "__svml_sqrt4", 4) +TLI_DEFINE_VECFUNC("llvm.sqrt.f64", "__svml_sqrt8", 8) + +TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "__svml_sqrtf4", 4) +TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "__svml_sqrtf8", 8) +TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "__svml_sqrtf16", 16) + TLI_DEFINE_VECFUNC("exp2", "__svml_exp22", 2) TLI_DEFINE_VECFUNC("exp2", "__svml_exp24", 4) TLI_DEFINE_VECFUNC("exp2", "__svml_exp28", 8) diff --git a/llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll b/llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll --- a/llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll @@ -300,3 +300,117 @@ !91 = distinct !{!31, !32, !33} !92 = !{!"llvm.loop.vectorize.width", i32 4} !93 = !{!"llvm.loop.vectorize.enable", i1 true} + +declare float @__log10f_finite(float) #0 + +; CHECK-LABEL: @log10_f32 +; CHECK: <4 x float> @__svml_log10f4 +; CHECK: ret +define void @log10_f32(float* nocapture %varray) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call fast float @__log10f_finite(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv + store float %call, float* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21 + +for.end: ; preds = %for.body + ret void +} + +!101 = distinct !{!21, !22, !23} +!102 = !{!"llvm.loop.vectorize.width", i32 4} +!103 = !{!"llvm.loop.vectorize.enable", i1 true} + + +declare double @__log10_finite(double) #0 + +; CHECK-LABEL: @log10_f64 +; CHECK: <4 x double> @__svml_log104 +; CHECK: ret +define void @log10_f64(double* nocapture %varray) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call fast double @__log10_finite(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %indvars.iv + store double %call, double* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31 + +for.end: ; preds = %for.body + ret void +} + +!111 = distinct !{!31, !32, !33} +!112 = !{!"llvm.loop.vectorize.width", i32 4} +!113 = !{!"llvm.loop.vectorize.enable", i1 true} + +declare float @__sqrtf_finite(float) #0 + +; CHECK-LABEL: @sqrt_f32 +; CHECK: <4 x float> @__svml_sqrtf4 +; CHECK: ret +define void @sqrt_f32(float* nocapture %varray) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call fast float @__sqrtf_finite(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv + store float %call, float* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21 + +for.end: ; preds = %for.body + ret void +} + +!121 = distinct !{!21, !22, !23} +!122 = !{!"llvm.loop.vectorize.width", i32 4} +!123 = !{!"llvm.loop.vectorize.enable", i1 true} + + +declare double @__sqrt_finite(double) #0 + +; CHECK-LABEL: @sqrt_f64 +; CHECK: <4 x double> @__svml_sqrt4 +; CHECK: ret +define void @sqrt_f64(double* nocapture %varray) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call fast double @__sqrt_finite(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %indvars.iv + store double %call, double* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31 + +for.end: ; preds = %for.body + ret void +} + +!131 = distinct !{!31, !32, !33} +!132 = !{!"llvm.loop.vectorize.width", i32 4} +!133 = !{!"llvm.loop.vectorize.enable", i1 true} diff --git a/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll --- a/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll @@ -33,6 +33,16 @@ declare double @llvm.log2.f64(double) #0 declare float @llvm.log2.f32(float) #0 +declare double @log10(double) #0 +declare float @log10f(float) #0 +declare double @llvm.log10.f64(double) #0 +declare float @llvm.log10.f32(float) #0 + +declare double @sqrt(double) #0 +declare float @sqrtf(float) #0 +declare double @llvm.sqrt.f64(double) #0 +declare float @llvm.sqrt.f32(float) #0 + declare double @exp2(double) #0 declare float @exp2f(float) #0 declare double @llvm.exp2.f64(double) #0 @@ -598,6 +608,190 @@ ret void } +define void @log10_f64(double* nocapture %varray) { +; CHECK-LABEL: @log10_f64( +; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_log104(<4 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @log10(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @log10_f32(float* nocapture %varray) { +; CHECK-LABEL: @log10_f32( +; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_log10f4(<4 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @log10f(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @log10_f64_intrinsic(double* nocapture %varray) { +; CHECK-LABEL: @log10_f64_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_log104(<4 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.log10.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @log10_f32_intrinsic(float* nocapture %varray) { +; CHECK-LABEL: @log10_f32_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_log10f4(<4 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.log10.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @sqrt_f64(double* nocapture %varray) { +; CHECK-LABEL: @sqrt_f64( +; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_sqrt4(<4 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @sqrt(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @sqrt_f32(float* nocapture %varray) { +; CHECK-LABEL: @sqrt_f32( +; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_sqrtf4(<4 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @sqrtf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @sqrt_f64_intrinsic(double* nocapture %varray) { +; CHECK-LABEL: @sqrt_f64_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_sqrt4(<4 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.sqrt.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @sqrt_f32_intrinsic(float* nocapture %varray) { +; CHECK-LABEL: @sqrt_f32_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x float> @__svml_sqrtf4(<4 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.sqrt.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + define void @exp2_f64(double* nocapture %varray) { ; CHECK-LABEL: @exp2_f64( ; CHECK: [[TMP5:%.*]] = call <4 x double> @__svml_exp24(<4 x double> [[TMP4:%.*]]) diff --git a/llvm/test/Transforms/Util/add-TLI-mappings.ll b/llvm/test/Transforms/Util/add-TLI-mappings.ll --- a/llvm/test/Transforms/Util/add-TLI-mappings.ll +++ b/llvm/test/Transforms/Util/add-TLI-mappings.ll @@ -9,10 +9,13 @@ target triple = "x86_64-unknown-linux-gnu" ; COMMON-LABEL: @llvm.compiler.used = appending global -; SVML-SAME: [3 x i8*] [ +; SVML-SAME: [6 x i8*] [ ; SVML-SAME: i8* bitcast (<2 x double> (<2 x double>)* @__svml_sin2 to i8*), ; SVML-SAME: i8* bitcast (<4 x double> (<4 x double>)* @__svml_sin4 to i8*), -; SVML-SAME: i8* bitcast (<8 x double> (<8 x double>)* @__svml_sin8 to i8*) +; SVML-SAME: i8* bitcast (<8 x double> (<8 x double>)* @__svml_sin8 to i8*), +; SVML-SAME: i8* bitcast (<4 x float> (<4 x float>)* @__svml_log10f4 to i8*), +; SVML-SAME: i8* bitcast (<8 x float> (<8 x float>)* @__svml_log10f8 to i8*), +; SVML-SAME: i8* bitcast (<16 x float> (<16 x float>)* @__svml_log10f16 to i8*) ; MASSV-SAME: [2 x i8*] [ ; MASSV-SAME: i8* bitcast (<2 x double> (<2 x double>)* @__sind2_massv to i8*), ; MASSV-SAME: i8* bitcast (<4 x float> (<4 x float>)* @__log10f4_massv to i8*)