Index: clang/include/clang/Basic/CodeGenOptions.h =================================================================== --- clang/include/clang/Basic/CodeGenOptions.h +++ clang/include/clang/Basic/CodeGenOptions.h @@ -54,11 +54,11 @@ enum VectorLibrary { NoLibrary, // Don't use any vector library. Accelerate, // Use the Accelerate framework. + LIBMVEC, // GLIBC vector math library. MASSV, // IBM MASS vector library. SVML // Intel short vector math library. }; - enum ObjCDispatchMethodKind { Legacy = 0, NonLegacy = 1, Index: clang/include/clang/Basic/CodeGenOptions.def =================================================================== --- clang/include/clang/Basic/CodeGenOptions.def +++ clang/include/clang/Basic/CodeGenOptions.def @@ -349,7 +349,7 @@ ENUM_CODEGENOPT(Inlining, InliningMethod, 2, NormalInlining) // Vector functions library to use. -ENUM_CODEGENOPT(VecLib, VectorLibrary, 2, NoLibrary) +ENUM_CODEGENOPT(VecLib, VectorLibrary, 3, NoLibrary) /// The default TLS model to use. ENUM_CODEGENOPT(DefaultTLSModel, TLSModel, 2, GeneralDynamicTLSModel) Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -1579,7 +1579,7 @@ Group, Flags<[CC1Option]>, HelpText<"Disables an experimental new pass manager in LLVM.">; def fveclib : Joined<["-"], "fveclib=">, Group, Flags<[CC1Option]>, - HelpText<"Use the given vector functions library">, Values<"Accelerate,MASSV,SVML,none">; + HelpText<"Use the given vector functions library">, Values<"Accelerate,libmvec,MASSV,SVML,none">; def fno_lax_vector_conversions : Flag<["-"], "fno-lax-vector-conversions">, Group, Alias, AliasArgs<["none"]>; def fno_merge_all_constants : Flag<["-"], "fno-merge-all-constants">, Group, Index: clang/lib/CodeGen/BackendUtil.cpp =================================================================== --- clang/lib/CodeGen/BackendUtil.cpp +++ clang/lib/CodeGen/BackendUtil.cpp @@ -371,6 +371,16 @@ case CodeGenOptions::Accelerate: TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::Accelerate); break; + case CodeGenOptions::LIBMVEC: + switch(TargetTriple.getArch()) { + default: + break; + case llvm::Triple::x86_64: + TLII->addVectorizableFunctionsFromVecLib + (TargetLibraryInfoImpl::LIBMVEC_X86); + break; + } + break; case CodeGenOptions::MASSV: TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::MASSV); break; Index: clang/lib/Frontend/CompilerInvocation.cpp =================================================================== --- clang/lib/Frontend/CompilerInvocation.cpp +++ clang/lib/Frontend/CompilerInvocation.cpp @@ -749,6 +749,8 @@ StringRef Name = A->getValue(); if (Name == "Accelerate") Opts.setVecLib(CodeGenOptions::Accelerate); + else if (Name == "libmvec") + Opts.setVecLib(CodeGenOptions::LIBMVEC); else if (Name == "MASSV") Opts.setVecLib(CodeGenOptions::MASSV); else if (Name == "SVML") Index: clang/test/Driver/autocomplete.c =================================================================== --- clang/test/Driver/autocomplete.c +++ clang/test/Driver/autocomplete.c @@ -73,6 +73,7 @@ // FLTOALL-NEXT: thin // RUN: %clang --autocomplete=-fveclib= | FileCheck %s -check-prefix=FVECLIBALL // FVECLIBALL: Accelerate +// FVECLIBALL-NEXT: libmvec // FVECLIBALL-NEXT: MASSV // FVECLIBALL-NEXT: none // FVECLIBALL-NEXT: SVML Index: clang/test/Driver/fveclib.c =================================================================== --- clang/test/Driver/fveclib.c +++ clang/test/Driver/fveclib.c @@ -1,10 +1,12 @@ // RUN: %clang -### -c -fveclib=none %s 2>&1 | FileCheck -check-prefix CHECK-NOLIB %s // RUN: %clang -### -c -fveclib=Accelerate %s 2>&1 | FileCheck -check-prefix CHECK-ACCELERATE %s +// RUN: %clang -### -c -fveclib=libmvec %s 2>&1 | FileCheck -check-prefix CHECK-libmvec %s // RUN: %clang -### -c -fveclib=MASSV %s 2>&1 | FileCheck -check-prefix CHECK-MASSV %s // RUN: not %clang -c -fveclib=something %s 2>&1 | FileCheck -check-prefix CHECK-INVALID %s // CHECK-NOLIB: "-fveclib=none" // CHECK-ACCELERATE: "-fveclib=Accelerate" +// CHECK-libmvec: "-fveclib=libmvec" // CHECK-MASSV: "-fveclib=MASSV" // CHECK-INVALID: error: invalid value 'something' in '-fveclib=something' Index: llvm/include/llvm/Analysis/TargetLibraryInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetLibraryInfo.h +++ llvm/include/llvm/Analysis/TargetLibraryInfo.h @@ -88,6 +88,7 @@ enum VectorLibrary { NoLibrary, // Don't use any vector library. Accelerate, // Use Accelerate framework. + LIBMVEC_X86,// GLIBC Vector Math library. MASSV, // IBM MASS vector library. SVML // Intel short vector math library. }; Index: llvm/include/llvm/Analysis/VecFuncs.def =================================================================== --- llvm/include/llvm/Analysis/VecFuncs.def +++ llvm/include/llvm/Analysis/VecFuncs.def @@ -62,6 +62,87 @@ TLI_DEFINE_VECFUNC("atanhf", "vatanhf", 4) +#elif defined(TLI_DEFINE_LIBMVEC_X86_VECFUNCS) +// GLIBC Vector math Functions + +TLI_DEFINE_VECFUNC("sin", "_ZGVbN2v_sin", 2) +TLI_DEFINE_VECFUNC("sin", "_ZGVdN4v_sin", 4) + +TLI_DEFINE_VECFUNC("sinf", "_ZGVbN4v_sinf", 4) +TLI_DEFINE_VECFUNC("sinf", "_ZGVdN8v_sinf", 8) + +TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVbN2v_sin", 2) +TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVdN4v_sin", 4) + +TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVbN4v_sinf", 4) +TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVdN8v_sinf", 8) + +TLI_DEFINE_VECFUNC("cos", "_ZGVbN2v_cos", 2) +TLI_DEFINE_VECFUNC("cos", "_ZGVdN4v_cos", 4) + +TLI_DEFINE_VECFUNC("cosf", "_ZGVbN4v_cosf", 4) +TLI_DEFINE_VECFUNC("cosf", "_ZGVdN8v_cosf", 8) + +TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVbN2v_cos", 2) +TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVdN4v_cos", 4) + +TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVbN4v_cosf", 4) +TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVdN8v_cosf", 8) + +TLI_DEFINE_VECFUNC("pow", "_ZGVbN2vv_pow", 2) +TLI_DEFINE_VECFUNC("pow", "_ZGVdN4vv_pow", 4) + +TLI_DEFINE_VECFUNC("powf", "_ZGVbN4vv_powf", 4) +TLI_DEFINE_VECFUNC("powf", "_ZGVdN8vv_powf", 8) + +TLI_DEFINE_VECFUNC("__pow_finite", "_ZGVbN2vv___pow_finite", 2) +TLI_DEFINE_VECFUNC("__pow_finite", "_ZGVdN4vv___pow_finite", 4) + +TLI_DEFINE_VECFUNC("__powf_finite", "_ZGVbN4vv___powf_finite", 4) +TLI_DEFINE_VECFUNC("__powf_finite", "_ZGVdN8vv___powf_finite", 8) + +TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVbN2vv_pow", 2) +TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVdN4vv_pow", 4) + +TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVbN4vv_powf", 4) +TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVdN8vv_powf", 8) + +TLI_DEFINE_VECFUNC("exp", "_ZGVbN2v_exp", 2) +TLI_DEFINE_VECFUNC("exp", "_ZGVdN4v_exp", 4) + +TLI_DEFINE_VECFUNC("expf", "_ZGVbN4v_expf", 4) +TLI_DEFINE_VECFUNC("expf", "_ZGVdN8v_expf", 8) + +TLI_DEFINE_VECFUNC("__exp_finite", "_ZGVbN2v___exp_finite", 2) +TLI_DEFINE_VECFUNC("__exp_finite", "_ZGVdN4v___exp_finite", 4) + +TLI_DEFINE_VECFUNC("__expf_finite", "_ZGVbN4v___expf_finite", 4) +TLI_DEFINE_VECFUNC("__expf_finite", "_ZGVdN8v___expf_finite", 8) + +TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVbN2v_exp", 2) +TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVdN4v_exp", 4) + +TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVbN4v_expf", 4) +TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVdN8v_expf", 8) + +TLI_DEFINE_VECFUNC("log", "_ZGVbN2v_log", 2) +TLI_DEFINE_VECFUNC("log", "_ZGVdN4v_log", 4) + +TLI_DEFINE_VECFUNC("logf", "_ZGVbN4v_logf", 4) +TLI_DEFINE_VECFUNC("logf", "_ZGVdN8v_logf", 8) + +TLI_DEFINE_VECFUNC("__log_finite", "_ZGVbN2v___log_finite", 2) +TLI_DEFINE_VECFUNC("__log_finite", "_ZGVdN4v___log_finite", 4) + +TLI_DEFINE_VECFUNC("__logf_finite", "_ZGVbN4v___logf_finite", 4) +TLI_DEFINE_VECFUNC("__logf_finite", "_ZGVdN8v___logf_finite", 8) + +TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVbN2v_log", 2) +TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVdN4v_log", 4) + +TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVbN4v_logf", 4) +TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVdN8v_logf", 8) + #elif defined(TLI_DEFINE_MASSV_VECFUNCS) // IBM MASS library's vector Functions @@ -339,6 +420,7 @@ #undef TLI_DEFINE_VECFUNC #undef TLI_DEFINE_ACCELERATE_VECFUNCS +#undef TLI_DEFINE_LIBMVEC_X86_VECFUNCS #undef TLI_DEFINE_MASSV_VECFUNCS #undef TLI_DEFINE_SVML_VECFUNCS #undef TLI_DEFINE_MASSV_VECFUNCS_NAMES Index: llvm/lib/Analysis/TargetLibraryInfo.cpp =================================================================== --- llvm/lib/Analysis/TargetLibraryInfo.cpp +++ llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -24,6 +24,8 @@ "No vector functions library"), clEnumValN(TargetLibraryInfoImpl::Accelerate, "Accelerate", "Accelerate framework"), + clEnumValN(TargetLibraryInfoImpl::LIBMVEC_X86, "LIBMVEC-X86", + "GLIBC Vector Math library"), clEnumValN(TargetLibraryInfoImpl::MASSV, "MASSV", "IBM MASS vector library"), clEnumValN(TargetLibraryInfoImpl::SVML, "SVML", @@ -1559,6 +1561,14 @@ addVectorizableFunctions(VecFuncs); break; } + case LIBMVEC_X86: { + const VecDesc VecFuncs[] = { + #define TLI_DEFINE_LIBMVEC_X86_VECFUNCS + #include "llvm/Analysis/VecFuncs.def" + }; + addVectorizableFunctions(VecFuncs); + break; + } case MASSV: { const VecDesc VecFuncs[] = { #define TLI_DEFINE_MASSV_VECFUNCS Index: llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-finite.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-finite.ll @@ -0,0 +1,176 @@ +; RUN: opt -vector-library=LIBMVEC-X86 -inject-tli-mappings -loop-vectorize -S < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @exp_f32(float* nocapture %varray) { +; CHECK-LABEL: @exp_f32 +; CHECK-LABEL: vector.body +; CHECK: <4 x float> @_ZGVbN4v___expf_finite +; CHECK: ret +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call fast float @__expf_finite(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv + store float %call, float* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1 + +for.end: ; preds = %for.body + ret void +} + +!1 = distinct !{!1, !2, !3} +!2 = !{!"llvm.loop.vectorize.width", i32 4} +!3 = !{!"llvm.loop.vectorize.enable", i1 true} + +define void @exp_f64(double* nocapture %varray) { +; CHECK-LABEL: @exp_f64 +; CHECK-LABEL: vector.body +; CHECK: <4 x double> @_ZGVdN4v___exp_finite +; CHECK: ret +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call fast double @__exp_finite(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %indvars.iv + store double %call, double* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !11 + +for.end: ; preds = %for.body + ret void +} + +!11 = distinct !{!11, !12, !13} +!12 = !{!"llvm.loop.vectorize.width", i32 4} +!13 = !{!"llvm.loop.vectorize.enable", i1 true} + +define void @log_f32(float* nocapture %varray) { +; CHECK-LABEL: @log_f32 +; CHECK-LABEL: vector.body +; CHECK: <4 x float> @_ZGVbN4v___logf_finite +; CHECK: ret +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call fast float @__logf_finite(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv + store float %call, float* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21 + +for.end: ; preds = %for.body + ret void +} + +!21 = distinct !{!21, !22, !23} +!22 = !{!"llvm.loop.vectorize.width", i32 4} +!23 = !{!"llvm.loop.vectorize.enable", i1 true} + +define void @log_f64(double* nocapture %varray) { +; CHECK-LABEL: @log_f64 +; CHECK-LABEL: vector.body +; CHECK: <4 x double> @_ZGVdN4v___log_finite +; CHECK: ret +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call fast double @__log_finite(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %indvars.iv + store double %call, double* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31 + +for.end: ; preds = %for.body + ret void +} + +!31 = distinct !{!31, !32, !33} +!32 = !{!"llvm.loop.vectorize.width", i32 4} +!33 = !{!"llvm.loop.vectorize.enable", i1 true} + +define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) { +; CHECK-LABEL: @pow_f32 +; CHECK-LABEL: vector.body +; CHECK: <4 x float> @_ZGVbN4vv___powf_finite +; CHECK: ret +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %arrayidx = getelementptr inbounds float, float* %exp, i64 %indvars.iv + %tmp1 = load float, float* %arrayidx, align 4 + %tmp2 = tail call fast float @__powf_finite(float %conv, float %tmp1) + %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %indvars.iv + store float %tmp2, float* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !41 + +for.end: ; preds = %for.body + ret void +} + +!41 = distinct !{!41, !42, !43} +!42 = !{!"llvm.loop.vectorize.width", i32 4} +!43 = !{!"llvm.loop.vectorize.enable", i1 true} + +define void @pow_f64(double* nocapture %varray, double* nocapture readonly %exp) { +; CHECK-LABEL: @pow_f64 +; CHECK-LABEL: vector.body +; CHECK: <4 x double> @_ZGVdN4vv___pow_finite +; CHECK: ret +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to double + %arrayidx = getelementptr inbounds double, double* %exp, i64 %indvars.iv + %tmp1 = load double, double* %arrayidx, align 4 + %tmp2 = tail call fast double @__pow_finite(double %conv, double %tmp1) + %arrayidx2 = getelementptr inbounds double, double* %varray, i64 %indvars.iv + store double %tmp2, double* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !51 + +for.end: ; preds = %for.body + ret void +} + +!51 = distinct !{!51, !52, !53} +!52 = !{!"llvm.loop.vectorize.width", i32 4} +!53 = !{!"llvm.loop.vectorize.enable", i1 true} + +declare float @__expf_finite(float) #0 +declare double @__exp_finite(double) #0 +declare float @__logf_finite(float) #0 +declare double @__log_finite(double) #0 +declare float @__powf_finite(float, float) #0 +declare double @__pow_finite(double, double) #0 Index: llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls.ll @@ -0,0 +1,373 @@ +; RUN: opt -vector-library=LIBMVEC-X86 -inject-tli-mappings -loop-vectorize -S < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @sin_f64(double* nocapture %varray) { +; CHECK-LABEL: @sin_f64( +; CHECK-LABEL: vector.body +; CHECK: [[TMP5:%.*]] = call <4 x double> @_ZGVdN4v_sin(<4 x double> [[TMP4:%.*]]) +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @sin(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1 + +for.end: + ret void +} + +!1 = distinct !{!1, !2, !3} +!2 = !{!"llvm.loop.vectorize.width", i32 4} +!3 = !{!"llvm.loop.vectorize.enable", i1 true} + + +define void @sin_f32(float* nocapture %varray) { +; CHECK-LABEL: @sin_f32( +; CHECK-LABEL: vector.body +; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVbN4v_sinf(<4 x float> [[TMP4:%.*]]) +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @sinf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21 + +for.end: + ret void +} + +!21 = distinct !{!21, !22, !23} +!22 = !{!"llvm.loop.vectorize.width", i32 4} +!23 = !{!"llvm.loop.vectorize.enable", i1 true} + +define void @sin_f64_intrinsic(double* nocapture %varray) { +; CHECK-LABEL: @sin_f64_intrinsic( +; CHECK-LABEL: vector.body +; CHECK: [[TMP5:%.*]] = call <4 x double> @_ZGVdN4v_sin(<4 x double> [[TMP4:%.*]]) +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.sin.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31 + +for.end: + ret void +} + +!31 = distinct !{!31, !32, !33} +!32 = !{!"llvm.loop.vectorize.width", i32 4} +!33 = !{!"llvm.loop.vectorize.enable", i1 true} + +define void @sin_f32_intrinsic(float* nocapture %varray) { +; CHECK-LABEL: @sin_f32_intrinsic( +; CHECK-LABEL: vector.body +; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVbN4v_sinf(<4 x float> [[TMP4:%.*]]) +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.sin.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !41 + +for.end: + ret void +} + +!41 = distinct !{!41, !42, !43} +!42 = !{!"llvm.loop.vectorize.width", i32 4} +!43 = !{!"llvm.loop.vectorize.enable", i1 true} + +define void @cos_f64(double* nocapture %varray) { +; CHECK-LABEL: @cos_f64( +; CHECK-LABEL: vector.body +; CHECK: [[TMP5:%.*]] = call <4 x double> @_ZGVdN4v_cos(<4 x double> [[TMP4:%.*]]) +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @cos(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !51 + +for.end: + ret void +} + +!51 = distinct !{!51, !52, !53} +!52 = !{!"llvm.loop.vectorize.width", i32 4} +!53 = !{!"llvm.loop.vectorize.enable", i1 true} + +define void @cos_f32(float* nocapture %varray) { +; CHECK-LABEL: @cos_f32( +; CHECK-LABEL: vector.body +; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVbN4v_cosf(<4 x float> [[TMP4:%.*]]) +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @cosf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !61 + +for.end: + ret void +} + +!61 = distinct !{!61, !62, !63} +!62 = !{!"llvm.loop.vectorize.width", i32 4} +!63 = !{!"llvm.loop.vectorize.enable", i1 true} + +define void @cos_f64_intrinsic(double* nocapture %varray) { +; CHECK-LABEL: @cos_f64_intrinsic( +; CHECK-LABEL: vector.body +; CHECK: [[TMP5:%.*]] = call <4 x double> @_ZGVdN4v_cos(<4 x double> [[TMP4:%.*]]) +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.cos.f64(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !71 + +for.end: + ret void +} + +!71 = distinct !{!71, !72, !73} +!72 = !{!"llvm.loop.vectorize.width", i32 4} +!73 = !{!"llvm.loop.vectorize.enable", i1 true} + +define void @cos_f32_intrinsic(float* nocapture %varray) { +; CHECK-LABEL: @cos_f32_intrinsic( +; CHECK-LABEL: vector.body +; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVbN4v_cosf(<4 x float> [[TMP4:%.*]]) +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.cos.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !81 + +for.end: + ret void +} + +!81 = distinct !{!81, !82, !83} +!82 = !{!"llvm.loop.vectorize.width", i32 4} +!83 = !{!"llvm.loop.vectorize.enable", i1 true} + + +define void @exp_f32(float* nocapture %varray) { +; CHECK-LABEL: @exp_f32 +; CHECK-LABEL: vector.body +; CHECK: <4 x float> @_ZGVbN4v_expf +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call fast float @expf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv + store float %call, float* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !91 + +for.end: ; preds = %for.body + ret void +} + +!91 = distinct !{!91, !92, !93} +!92 = !{!"llvm.loop.vectorize.width", i32 4} +!93 = !{!"llvm.loop.vectorize.enable", i1 true} + +define void @exp_f32_intrin(float* nocapture %varray) { +; CHECK-LABEL: @exp_f32_intrin +; CHECK-LABEL: vector.body +; CHECK: <4 x float> @_ZGVbN4v_expf +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call fast float @llvm.exp.f32(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv + store float %call, float* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !101 + +for.end: ; preds = %for.body + ret void +} + +!101 = distinct !{!101, !102, !103} +!102 = !{!"llvm.loop.vectorize.width", i32 4} +!103 = !{!"llvm.loop.vectorize.enable", i1 true} + + +define void @log_f32(float* nocapture %varray) { +; CHECK-LABEL: @log_f32 +; CHECK-LABEL: vector.body +; CHECK: <4 x float> @_ZGVbN4v_logf +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call fast float @logf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv + store float %call, float* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !111 + +for.end: ; preds = %for.body + ret void +} + +!111 = distinct !{!111, !112, !113} +!112 = !{!"llvm.loop.vectorize.width", i32 4} +!113 = !{!"llvm.loop.vectorize.enable", i1 true} + +define void @pow_f32(float* nocapture %varray, float* nocapture readonly %exp) { +; CHECK-LABEL: @pow_f32 +; CHECK-LABEL: vector.body +; CHECK: <4 x float> @_ZGVbN4vv_powf +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %arrayidx = getelementptr inbounds float, float* %exp, i64 %indvars.iv + %tmp1 = load float, float* %arrayidx, align 4 + %tmp2 = tail call fast float @powf(float %conv, float %tmp1) + %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %indvars.iv + store float %tmp2, float* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !121 + +for.end: ; preds = %for.body + ret void +} + +!121 = distinct !{!121, !122, !123} +!122 = !{!"llvm.loop.vectorize.width", i32 4} +!123 = !{!"llvm.loop.vectorize.enable", i1 true} + +define void @pow_f32_intrin(float* nocapture %varray, float* nocapture readonly %exp) { +; CHECK-LABEL: @pow_f32_intrin +; CHECK-LABEL: vector.body +; CHECK: <4 x float> @_ZGVbN4vv_powf +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %arrayidx = getelementptr inbounds float, float* %exp, i64 %indvars.iv + %tmp1 = load float, float* %arrayidx, align 4 + %tmp2 = tail call fast float @llvm.pow.f32(float %conv, float %tmp1) + %arrayidx2 = getelementptr inbounds float, float* %varray, i64 %indvars.iv + store float %tmp2, float* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !131 + +for.end: ; preds = %for.body + ret void +} + +!131 = distinct !{!131, !132, !133} +!132 = !{!"llvm.loop.vectorize.width", i32 4} +!133 = !{!"llvm.loop.vectorize.enable", i1 true} + +attributes #0 = { nounwind readnone } + +declare double @sin(double) #0 +declare float @sinf(float) #0 +declare double @llvm.sin.f64(double) #0 +declare float @llvm.sin.f32(float) #0 +declare double @cos(double) #0 +declare float @cosf(float) #0 +declare double @llvm.cos.f64(double) #0 +declare float @llvm.cos.f32(float) #0 +declare float @expf(float) #0 +declare float @powf(float, float) #0 +declare float @llvm.exp.f32(float) #0 +declare float @logf(float) #0 +declare float @llvm.pow.f32(float, float) #0 Index: llvm/test/Transforms/Util/add-TLI-mappings.ll =================================================================== --- llvm/test/Transforms/Util/add-TLI-mappings.ll +++ llvm/test/Transforms/Util/add-TLI-mappings.ll @@ -3,6 +3,8 @@ ; RUN: opt -vector-library=MASSV -inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,MASSV ; RUN: opt -vector-library=MASSV -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,MASSV ; RUN: opt -vector-library=Accelerate -inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,ACCELERATE +; RUN: opt -vector-library=LIBMVEC-X86 -inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,LIBMVEC-X86 +; RUN: opt -vector-library=LIBMVEC-X86 -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,LIBMVEC-X86 ; RUN: opt -vector-library=Accelerate -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,ACCELERATE target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -21,6 +23,9 @@ ; MASSV-SAME: i8* bitcast (<4 x float> (<4 x float>)* @__log10f4_massv to i8*) ; ACCELERATE-SAME: [1 x i8*] [ ; ACCELERATE-SAME: i8* bitcast (<4 x float> (<4 x float>)* @vlog10f to i8*) +; LIBMVEC-X86-SAME: [2 x i8*] [ +; LIBMVEC-X86-SAME: i8* bitcast (<2 x double> (<2 x double>)* @_ZGVbN2v_sin to i8*), +; LIBMVEC-X86-SAME: i8* bitcast (<4 x double> (<4 x double>)* @_ZGVdN4v_sin to i8*) ; COMMON-SAME: ], section "llvm.metadata" define double @sin_f64(double %in) { @@ -28,6 +33,7 @@ ; SVML: call double @sin(double %{{.*}}) #[[SIN:[0-9]+]] ; MASSV: call double @sin(double %{{.*}}) #[[SIN:[0-9]+]] ; ACCELERATE: call double @sin(double %{{.*}}) +; LIBMVEC-X86: call double @sin(double %{{.*}}) #[[SIN:[0-9]+]] ; No mapping of "sin" to a vector function for Accelerate. ; ACCELERATE-NOT: _ZGV_LLVM_{{.*}}_sin({{.*}}) %call = tail call double @sin(double %in) @@ -39,10 +45,12 @@ define float @call_llvm.log10.f32(float %in) { ; COMMON-LABEL: @call_llvm.log10.f32( ; SVML: call float @llvm.log10.f32(float %{{.*}}) +; LIBMVEC-X86: call float @llvm.log10.f32(float %{{.*}}) ; MASSV: call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]] ; ACCELERATE: call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]] ; No mapping of "llvm.log10.f32" to a vector function for SVML. ; SVML-NOT: _ZGV_LLVM_{{.*}}_llvm.log10.f32({{.*}}) +; LIBMVEC-X86-NOT: _ZGV_LLVM_{{.*}}_llvm.log10.f32({{.*}}) %call = tail call float @llvm.log10.f32(float %in) ret float %call } @@ -62,3 +70,7 @@ ; ACCELERATE: attributes #[[LOG10]] = { "vector-function-abi-variant"= ; ACCELERATE-SAME: "_ZGV_LLVM_N4v_llvm.log10.f32(vlog10f)" } + +; LIBMVEC-X86: attributes #[[SIN]] = { "vector-function-abi-variant"= +; LIBMVEC-X86-SAME: "_ZGV_LLVM_N2v_sin(_ZGVbN2v_sin), +; LIBMVEC-X86-SAME: _ZGV_LLVM_N4v_sin(_ZGVdN4v_sin)" }