Index: clang/include/clang/Basic/CodeGenOptions.h =================================================================== --- clang/include/clang/Basic/CodeGenOptions.h +++ clang/include/clang/Basic/CodeGenOptions.h @@ -54,6 +54,7 @@ enum VectorLibrary { NoLibrary, // Don't use any vector library. Accelerate, // Use the Accelerate framework. + LIBMVEC, // GLIBC vector math library. MASSV, // IBM MASS vector library. SVML // Intel short vector math library. }; Index: clang/include/clang/Basic/CodeGenOptions.def =================================================================== --- clang/include/clang/Basic/CodeGenOptions.def +++ clang/include/clang/Basic/CodeGenOptions.def @@ -341,7 +341,7 @@ ENUM_CODEGENOPT(Inlining, InliningMethod, 2, NormalInlining) // Vector functions library to use. -ENUM_CODEGENOPT(VecLib, VectorLibrary, 2, NoLibrary) +ENUM_CODEGENOPT(VecLib, VectorLibrary, 3, NoLibrary) /// The default TLS model to use. ENUM_CODEGENOPT(DefaultTLSModel, TLSModel, 2, GeneralDynamicTLSModel) Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -1561,7 +1561,7 @@ Group, Flags<[CC1Option]>, HelpText<"Disables an experimental new pass manager in LLVM.">; def fveclib : Joined<["-"], "fveclib=">, Group, Flags<[CC1Option]>, - HelpText<"Use the given vector functions library">, Values<"Accelerate,MASSV,SVML,none">; + HelpText<"Use the given vector functions library">, Values<"Accelerate,LIBMVEC,MASSV,SVML,none">; def fno_lax_vector_conversions : Flag<["-"], "fno-lax-vector-conversions">, Group, Alias, AliasArgs<["none"]>; def fno_merge_all_constants : Flag<["-"], "fno-merge-all-constants">, Group, Index: clang/lib/CodeGen/BackendUtil.cpp =================================================================== --- clang/lib/CodeGen/BackendUtil.cpp +++ clang/lib/CodeGen/BackendUtil.cpp @@ -371,6 +371,9 @@ case CodeGenOptions::Accelerate: TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::Accelerate); break; + case CodeGenOptions::LIBMVEC: + TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::LIBMVEC); + break; case CodeGenOptions::MASSV: TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::MASSV); break; Index: clang/lib/Frontend/CompilerInvocation.cpp =================================================================== --- clang/lib/Frontend/CompilerInvocation.cpp +++ clang/lib/Frontend/CompilerInvocation.cpp @@ -749,6 +749,8 @@ StringRef Name = A->getValue(); if (Name == "Accelerate") Opts.setVecLib(CodeGenOptions::Accelerate); + else if (Name == "LIBMVEC") + Opts.setVecLib(CodeGenOptions::LIBMVEC); else if (Name == "MASSV") Opts.setVecLib(CodeGenOptions::MASSV); else if (Name == "SVML") Index: clang/test/Driver/autocomplete.c =================================================================== --- clang/test/Driver/autocomplete.c +++ clang/test/Driver/autocomplete.c @@ -71,6 +71,7 @@ // FLTOALL-NEXT: thin // RUN: %clang --autocomplete=-fveclib= | FileCheck %s -check-prefix=FVECLIBALL // FVECLIBALL: Accelerate +// FVECLIBALL-NEXT: LIBMVEC // FVECLIBALL-NEXT: MASSV // FVECLIBALL-NEXT: none // FVECLIBALL-NEXT: SVML Index: llvm/include/llvm/Analysis/TargetLibraryInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetLibraryInfo.h +++ llvm/include/llvm/Analysis/TargetLibraryInfo.h @@ -88,6 +88,7 @@ enum VectorLibrary { NoLibrary, // Don't use any vector library. Accelerate, // Use Accelerate framework. + LIBMVEC, // GLIBC Vector Math library. MASSV, // IBM MASS vector library. SVML // Intel short vector math library. }; Index: llvm/include/llvm/Analysis/VecFuncs.def =================================================================== --- llvm/include/llvm/Analysis/VecFuncs.def +++ llvm/include/llvm/Analysis/VecFuncs.def @@ -62,6 +62,87 @@ TLI_DEFINE_VECFUNC("atanhf", "vatanhf", 4) +#elif defined(TLI_DEFINE_LIBMVEC_VECFUNCS) +// GLIBC Vector math Functions + +TLI_DEFINE_VECFUNC("sin", "_ZGVbN2v_sin", 2) +TLI_DEFINE_VECFUNC("sin", "_ZGVdN4v_sin", 4) + +TLI_DEFINE_VECFUNC("sinf", "_ZGVbN4v_sinf", 4) +TLI_DEFINE_VECFUNC("sinf", "_ZGVdN8v_sinf", 8) + +TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVbN2v_sin", 2) +TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVdN4v_sin", 4) + +TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVbN4v_sinf", 4) +TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVdN8v_sinf", 8) + +TLI_DEFINE_VECFUNC("cos", "_ZGVbN2v_cos", 2) +TLI_DEFINE_VECFUNC("cos", "_ZGVdN4v_cos", 4) + +TLI_DEFINE_VECFUNC("cosf", "_ZGVbN4v_cosf", 4) +TLI_DEFINE_VECFUNC("cosf", "_ZGVdN8v_cosf", 8) + +TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVbN2v_cos", 2) +TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVdN4v_cos", 4) + +TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVbN4v_cosf", 4) +TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVdN8v_cosf", 8) + +TLI_DEFINE_VECFUNC("pow", "_ZGVbN2vv_pow", 2) +TLI_DEFINE_VECFUNC("pow", "_ZGVdN4vv_pow", 4) + +TLI_DEFINE_VECFUNC("powf", "_ZGVbN4vv_powf", 4) +TLI_DEFINE_VECFUNC("powf", "_ZGVdN8vv_powf", 8) + +TLI_DEFINE_VECFUNC("__pow_finite", "_ZGVbN2vv___pow_finite", 2) +TLI_DEFINE_VECFUNC("__pow_finite", "_ZGVdN4vv___pow_finite", 4) + +TLI_DEFINE_VECFUNC("__powf_finite", "_ZGVbN4vv___powf_finite", 4) +TLI_DEFINE_VECFUNC("__powf_finite", "_ZGVdN8vv___powf_finite", 8) + +TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVbN2vv_pow", 2) +TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVdN4vv_pow", 4) + +TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVbN4vv_powf", 4) +TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVdN8vv_powf", 8) + +TLI_DEFINE_VECFUNC("exp", "_ZGVbN2v_exp", 2) +TLI_DEFINE_VECFUNC("exp", "_ZGVdN4v_exp", 4) + +TLI_DEFINE_VECFUNC("expf", "_ZGVbN4v_expf", 4) +TLI_DEFINE_VECFUNC("expf", "_ZGVdN8v_expf", 8) + +TLI_DEFINE_VECFUNC("__exp_finite", "_ZGVbN2v___exp_finite", 2) +TLI_DEFINE_VECFUNC("__exp_finite", "_ZGVdN4v___exp_finite", 4) + +TLI_DEFINE_VECFUNC("__expf_finite", "_ZGVbN4v___expf_finite", 4) +TLI_DEFINE_VECFUNC("__expf_finite", "_ZGVdN8v___expf_finite", 8) + +TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVbN2v_exp", 2) +TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVdN4v_exp", 4) + +TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVbN4v_expf", 4) +TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVdN8v_expf", 8) + +TLI_DEFINE_VECFUNC("log", "_ZGVbN2v_log", 2) +TLI_DEFINE_VECFUNC("log", "_ZGVdN4v_log", 4) + +TLI_DEFINE_VECFUNC("logf", "_ZGVbN4v_logf", 4) +TLI_DEFINE_VECFUNC("logf", "_ZGVdN8v_logf", 8) + +TLI_DEFINE_VECFUNC("__log_finite", "_ZGVbN2v___log_finite", 2) +TLI_DEFINE_VECFUNC("__log_finite", "_ZGVdN4v___log_finite", 4) + +TLI_DEFINE_VECFUNC("__logf_finite", "_ZGVbN4v___logf_finite", 4) +TLI_DEFINE_VECFUNC("__logf_finite", "_ZGVdN8v___logf_finite", 8) + +TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVbN2v_log", 2) +TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVdN4v_log", 4) + +TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVbN4v_logf", 4) +TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVdN8v_logf", 8) + #elif defined(TLI_DEFINE_MASSV_VECFUNCS) // IBM MASS library's vector Functions @@ -347,6 +428,7 @@ #undef TLI_DEFINE_VECFUNC #undef TLI_DEFINE_ACCELERATE_VECFUNCS +#undef TLI_DEFINE_LIBMVEC_VECFUNCS #undef TLI_DEFINE_MASSV_VECFUNCS #undef TLI_DEFINE_SVML_VECFUNCS #undef TLI_DEFINE_MASSV_VECFUNCS_NAMES Index: llvm/lib/Analysis/TargetLibraryInfo.cpp =================================================================== --- llvm/lib/Analysis/TargetLibraryInfo.cpp +++ llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -24,6 +24,8 @@ "No vector functions library"), clEnumValN(TargetLibraryInfoImpl::Accelerate, "Accelerate", "Accelerate framework"), + clEnumValN(TargetLibraryInfoImpl::LIBMVEC, "LIBMVEC", + "GLIBC Vector Math library"), clEnumValN(TargetLibraryInfoImpl::MASSV, "MASSV", "IBM MASS vector library"), clEnumValN(TargetLibraryInfoImpl::SVML, "SVML", @@ -1558,6 +1560,14 @@ addVectorizableFunctions(VecFuncs); break; } + case LIBMVEC: { + const VecDesc VecFuncs[] = { + #define TLI_DEFINE_LIBMVEC_VECFUNCS + #include "llvm/Analysis/VecFuncs.def" + }; + addVectorizableFunctions(VecFuncs); + break; + } case MASSV: { const VecDesc VecFuncs[] = { #define TLI_DEFINE_MASSV_VECFUNCS Index: llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-finite.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-finite.ll @@ -0,0 +1,183 @@ +; RUN: opt -vector-library=LIBMVEC -inject-tli-mappings -loop-vectorize -S < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare float @__expf_finite(float) #0 + +; CHECK-LABEL: @exp_f32 +; CHECK: <4 x float> @_ZGVbN4v___expf_finite +; CHECK: ret +define void @exp_f32(float* nocapture %varray) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call fast float @__expf_finite(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv + store float %call, float* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1 + +for.end: ; preds = %for.body + ret void +} + +!1 = distinct !{!1, !2, !3} +!2 = !{!"llvm.loop.vectorize.width", i32 4} +!3 = !{!"llvm.loop.vectorize.enable", i1 true} + + +declare double @__exp_finite(double) #0 + +; CHECK-LABEL: @exp_f64 +; CHECK: <4 x double> @_ZGVdN4v___exp_finite +; CHECK: ret +define void @exp_f64(double* nocapture %varray) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call fast double @__exp_finite(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %indvars.iv + store double %call, double* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !11 + +for.end: ; preds = %for.body + ret void +} + +!11 = distinct !{!11, !12, !13} +!12 = !{!"llvm.loop.vectorize.width", i32 4} +!13 = !{!"llvm.loop.vectorize.enable", i1 true} + + + + +declare float @__logf_finite(float) #0 + +; CHECK-LABEL: @log_f32 +; CHECK: <4 x float> @_ZGVbN4v___logf_finite +; CHECK: ret +define void @log_f32(float* nocapture %varray) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call fast float @__logf_finite(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv + store float %call, float* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21 + +for.end: ; preds = %for.body + ret void +} + +!21 = distinct !{!21, !22, !23} +!22 = !{!"llvm.loop.vectorize.width", i32 4} +!23 = !{!"llvm.loop.vectorize.enable", i1 true} + + +declare double @__log_finite(double) #0 + +; CHECK-LABEL: @log_f64 +; CHECK: <4 x double> @_ZGVdN4v___log_finite +; CHECK: ret +define void @log_f64(double* nocapture %varray) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call fast double @__log_finite(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %indvars.iv + store double %call, double* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31 + +for.end: ; preds = %for.body + ret void +} + +!31 = distinct !{!31, !32, !33} +!32 = !{!"llvm.loop.vectorize.width", i32 4} +!33 = !{!"llvm.loop.vectorize.enable", i1 true} + + +declare float @__powf_finite(float, float) #0 + +; CHECK-LABEL: @pow_f32 +; CHECK: <4 x float> @_ZGVbN4vv___powf_finite +; CHECK: ret +define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %arrayidx = getelementptr inbounds float, float* %exp, i64 %indvars.iv + %tmp1 = load float, float* %arrayidx, align 4 + %tmp2 = tail call fast float @__powf_finite(float %conv, float %tmp1) + %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %indvars.iv + store float %tmp2, float* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !41 + +for.end: ; preds = %for.body + ret void +} + +!41 = distinct !{!41, !42, !43} +!42 = !{!"llvm.loop.vectorize.width", i32 4} +!43 = !{!"llvm.loop.vectorize.enable", i1 true} + + +declare double @__pow_finite(double, double) #0 + +; CHECK-LABEL: @pow_f64 +; CHECK: <4 x double> @_ZGVdN4vv___pow_finite +; CHECK: ret +define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to double + %arrayidx = getelementptr inbounds double, double* %exp, i64 %indvars.iv + %tmp1 = load double, double* %arrayidx, align 4 + %tmp2 = tail call fast double @__pow_finite(double %conv, double %tmp1) + %arrayidx2 = getelementptr inbounds double, double* %varray, i64 %indvars.iv + store double %tmp2, double* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !51 + +for.end: ; preds = %for.body + ret void +} + +!51 = distinct !{!51, !52, !53} +!52 = !{!"llvm.loop.vectorize.width", i32 4} +!53 = !{!"llvm.loop.vectorize.enable", i1 true} + Index: llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls.ll @@ -0,0 +1,325 @@ +; RUN: opt -vector-library=LIBMVEC -inject-tli-mappings -force-vector-width=4 -force-vector-interleave=1 -loop-vectorize -S < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare double @sin(double) #0 +declare float @sinf(float) #0 +declare double @llvm.sin.f64(double) #0 +declare float @llvm.sin.f32(float) #0 + +declare double @cos(double) #0 +declare float @cosf(float) #0 +declare double @llvm.cos.f64(double) #0 +declare float @llvm.cos.f32(float) #0 + +define void @sin_f64(double* nocapture %varray) { +; CHECK-LABEL: @sin_f64( +; CHECK: [[TMP5:%.*]] = call <4 x double> @_ZGVdN4v_sin(<4 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @sin(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @sin_f32(float* nocapture %varray) { +; CHECK-LABEL: @sin_f32( +; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVbN4v_sinf(<4 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @sinf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @sin_f64_intrinsic(double* nocapture %varray) { +; CHECK-LABEL: @sin_f64_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x double> @_ZGVdN4v_sin(<4 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.sin.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @sin_f32_intrinsic(float* nocapture %varray) { +; CHECK-LABEL: @sin_f32_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVbN4v_sinf(<4 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.sin.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @cos_f64(double* nocapture %varray) { +; CHECK-LABEL: @cos_f64( +; CHECK: [[TMP5:%.*]] = call <4 x double> @_ZGVdN4v_cos(<4 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @cos(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @cos_f32(float* nocapture %varray) { +; CHECK-LABEL: @cos_f32( +; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVbN4v_cosf(<4 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @cosf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @cos_f64_intrinsic(double* nocapture %varray) { +; CHECK-LABEL: @cos_f64_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x double> @_ZGVdN4v_cos(<4 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.cos.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @cos_f32_intrinsic(float* nocapture %varray) { +; CHECK-LABEL: @cos_f32_intrinsic( +; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVbN4v_cosf(<4 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.cos.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare float @expf(float) #0 + +define void @exp_f32(float* nocapture %varray) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call fast float @expf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv + store float %call, float* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; CHECK-LABEL: @exp_f32_intrin +; CHECK: <4 x float> @_ZGVbN4v_expf +; CHECK: ret + +declare float @llvm.exp.f32(float) #0 + +define void @exp_f32_intrin(float* nocapture %varray) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call fast float @llvm.exp.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv + store float %call, float* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; CHECK-LABEL: @log_f32 +; CHECK: <4 x float> @_ZGVbN4v_logf +; CHECK: ret + +declare float @logf(float) #0 + +define void @log_f32(float* nocapture %varray) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call fast float @logf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv + store float %call, float* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; CHECK-LABEL: @pow_f32 +; CHECK: <4 x float> @_ZGVbN4vv_powf +; CHECK: ret + +declare float @powf(float, float) #0 + +define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %arrayidx = getelementptr inbounds float, float* %exp, i64 %indvars.iv + %tmp1 = load float, float* %arrayidx, align 4 + %tmp2 = tail call fast float @powf(float %conv, float %tmp1) + %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %indvars.iv + store float %tmp2, float* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; CHECK-LABEL: @pow_f32_intrin +; CHECK: <4 x float> @_ZGVbN4vv_powf +; CHECK: ret + +declare float @llvm.pow.f32(float, float) #0 + +define void @pow_f32_intrin(float* nocapture %varray, float* nocapture readonly %exp) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %arrayidx = getelementptr inbounds float, float* %exp, i64 %indvars.iv + %tmp1 = load float, float* %arrayidx, align 4 + %tmp2 = tail call fast float @llvm.pow.f32(float %conv, float %tmp1) + %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %indvars.iv + store float %tmp2, float* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +attributes #0 = { nounwind readnone } Index: llvm/test/Transforms/Util/add-TLI-mappings.ll =================================================================== --- llvm/test/Transforms/Util/add-TLI-mappings.ll +++ llvm/test/Transforms/Util/add-TLI-mappings.ll @@ -3,6 +3,8 @@ ; RUN: opt -vector-library=MASSV -inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,MASSV ; RUN: opt -vector-library=MASSV -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,MASSV ; RUN: opt -vector-library=Accelerate -inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,ACCELERATE +; RUN: opt -vector-library=LIBMVEC -inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,LIBMVEC +; RUN: opt -vector-library=LIBMVEC -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,LIBMVEC ; RUN: opt -vector-library=Accelerate -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,ACCELERATE target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -21,6 +23,9 @@ ; MASSV-SAME: i8* bitcast (<4 x float> (<4 x float>)* @__log10f4_massv to i8*) ; ACCELERATE-SAME: [1 x i8*] [ ; ACCELERATE-SAME: i8* bitcast (<4 x float> (<4 x float>)* @vlog10f to i8*) +; LIBMVEC-SAME: [2 x i8*] [ +; LIBMVEC-SAME: i8* bitcast (<2 x double> (<2 x double>)* @_ZGVbN2v_sin to i8*), +; LIBMVEC-SAME: i8* bitcast (<4 x double> (<4 x double>)* @_ZGVdN4v_sin to i8*) ; COMMON-SAME: ], section "llvm.metadata" define double @sin_f64(double %in) { @@ -28,6 +33,7 @@ ; SVML: call double @sin(double %{{.*}}) #[[SIN:[0-9]+]] ; MASSV: call double @sin(double %{{.*}}) #[[SIN:[0-9]+]] ; ACCELERATE: call double @sin(double %{{.*}}) +; LIBMVEC: call double @sin(double %{{.*}}) #[[SIN:[0-9]+]] ; No mapping of "sin" to a vector function for Accelerate. ; ACCELERATE-NOT: _ZGV_LLVM_{{.*}}_sin({{.*}}) %call = tail call double @sin(double %in) @@ -39,10 +45,12 @@ define float @call_llvm.log10.f32(float %in) { ; COMMON-LABEL: @call_llvm.log10.f32( ; SVML: call float @llvm.log10.f32(float %{{.*}}) +; LIBMVEC: call float @llvm.log10.f32(float %{{.*}}) ; MASSV: call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]] ; ACCELERATE: call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]] ; No mapping of "llvm.log10.f32" to a vector function for SVML. ; SVML-NOT: _ZGV_LLVM_{{.*}}_llvm.log10.f32({{.*}}) +; LIBMVEC-NOT: _ZGV_LLVM_{{.*}}_llvm.log10.f32({{.*}}) %call = tail call float @llvm.log10.f32(float %in) ret float %call } @@ -62,3 +70,7 @@ ; ACCELERATE: attributes #[[LOG10]] = { "vector-function-abi-variant"= ; ACCELERATE-SAME: "_ZGV_LLVM_N4v_llvm.log10.f32(vlog10f)" } + +; LIBMVEC: attributes #[[SIN]] = { "vector-function-abi-variant"= +; LIBMVEC-SAME: "_ZGV_LLVM_N2v_sin(_ZGVbN2v_sin), +; LIBMVEC-SAME: _ZGV_LLVM_N4v_sin(_ZGVdN4v_sin)" }