Index: include/llvm/Analysis/TargetLibraryInfo.h =================================================================== --- include/llvm/Analysis/TargetLibraryInfo.h +++ include/llvm/Analysis/TargetLibraryInfo.h @@ -46,6 +46,7 @@ class TargetLibraryInfoImpl { friend class TargetLibraryInfo; + llvm::Triple TT; unsigned char AvailableArray[(NumLibFuncs+3)/4]; llvm::DenseMap CustomNames; static StringRef const StandardNames[NumLibFuncs]; @@ -86,7 +87,9 @@ enum VectorLibrary { NoLibrary, // Don't use any vector library. Accelerate, // Use Accelerate framework. - SVML // Intel short vector math library. + SVML, // Intel short vector math library. + SLEEF // SLEEF - SIMD Library for Evaluating Elementary Functions + // (Experimental). }; TargetLibraryInfoImpl(); Index: include/llvm/Analysis/TargetLibraryInfo.def =================================================================== --- include/llvm/Analysis/TargetLibraryInfo.def +++ include/llvm/Analysis/TargetLibraryInfo.def @@ -296,12 +296,30 @@ /// long double __expl_finite(long double x); TLI_DEFINE_ENUM_INTERNAL(expl_finite) TLI_DEFINE_STRING_INTERNAL("__expl_finite") +/// double __gamma_r_finite(double x, int* s); +TLI_DEFINE_ENUM_INTERNAL(tgamma_finite) +TLI_DEFINE_STRING_INTERNAL("__gamma_r_finite") +/// float __gammaf_r_finite(float x, int* s); +TLI_DEFINE_ENUM_INTERNAL(tgammaf_finite) +TLI_DEFINE_STRING_INTERNAL("__gammaf_r_finite") +/// long double __gammal_r_finite(long double x, int* s); +TLI_DEFINE_ENUM_INTERNAL(tgammal_finite) +TLI_DEFINE_STRING_INTERNAL("__gammal_r_finite") /// int __isoc99_scanf (const char *format, ...) TLI_DEFINE_ENUM_INTERNAL(dunder_isoc99_scanf) TLI_DEFINE_STRING_INTERNAL("__isoc99_scanf") /// int __isoc99_sscanf(const char *s, const char *format, ...) TLI_DEFINE_ENUM_INTERNAL(dunder_isoc99_sscanf) TLI_DEFINE_STRING_INTERNAL("__isoc99_sscanf") +/// double __lgamma_r_finite(double x, int* s); +TLI_DEFINE_ENUM_INTERNAL(lgamma_finite) +TLI_DEFINE_STRING_INTERNAL("__lgamma_r_finite") +/// float __lgammaf_r_finite(float x, int* s); +TLI_DEFINE_ENUM_INTERNAL(lgammaf_finite) +TLI_DEFINE_STRING_INTERNAL("__lgammaf_r_finite") +/// long double __lgammal_r_finite(long double x, int* s); +TLI_DEFINE_ENUM_INTERNAL(lgammal_finite) +TLI_DEFINE_STRING_INTERNAL("__lgammal_r_finite") /// double __log10_finite(double x); TLI_DEFINE_ENUM_INTERNAL(log10_finite) TLI_DEFINE_STRING_INTERNAL("__log10_finite") @@ -876,6 +894,15 @@ /// long double ldexpl(long double x, int n); TLI_DEFINE_ENUM_INTERNAL(ldexpl) TLI_DEFINE_STRING_INTERNAL("ldexpl") +/// double lgamma(double x); +TLI_DEFINE_ENUM_INTERNAL(lgamma) +TLI_DEFINE_STRING_INTERNAL("lgamma") +/// float lgammaf(float x); +TLI_DEFINE_ENUM_INTERNAL(lgammaf) +TLI_DEFINE_STRING_INTERNAL("lgammaf") +/// long double lgammal(long double x); +TLI_DEFINE_ENUM_INTERNAL(lgammal) +TLI_DEFINE_STRING_INTERNAL("lgammal") /// long long int llabs(long long int j); TLI_DEFINE_ENUM_INTERNAL(llabs) TLI_DEFINE_STRING_INTERNAL("llabs") @@ -1275,6 +1302,15 @@ /// long double tanl(long double x); TLI_DEFINE_ENUM_INTERNAL(tanl) TLI_DEFINE_STRING_INTERNAL("tanl") +/// double tgamma(double x); +TLI_DEFINE_ENUM_INTERNAL(tgamma) +TLI_DEFINE_STRING_INTERNAL("tgamma") +/// float tgammaf(float x); +TLI_DEFINE_ENUM_INTERNAL(tgammaf) +TLI_DEFINE_STRING_INTERNAL("tgammaf") +/// long double tgammal(long double x); +TLI_DEFINE_ENUM_INTERNAL(tgammal) +TLI_DEFINE_STRING_INTERNAL("tgammal") /// clock_t times(struct tms *buffer); TLI_DEFINE_ENUM_INTERNAL(times) TLI_DEFINE_STRING_INTERNAL("times") Index: include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- include/llvm/CodeGen/ISDOpcodes.h +++ include/llvm/CodeGen/ISDOpcodes.h @@ -878,6 +878,13 @@ /// FMIN/FMAX nodes can have flags, for NaN/NoNaN variants. VECREDUCE_FMAX, VECREDUCE_FMIN, + STRICT_FACOS, STRICT_FASIN, STRICT_FATAN, STRICT_FATAN2, + STRICT_FTAN, STRICT_FCOSH, STRICT_FSINH, STRICT_FTANH, + STRICT_FASINH, STRICT_FACOSH, STRICT_FATANH, STRICT_FLGAMMA, + STRICT_FTGAMMA, STRICT_FEXP10, + FACOS, FASIN, FATAN, FATAN2, FTAN, FCOSH, FSINH, FTANH, + FASINH, FACOSH, FATANH, FLGAMMA, FTGAMMA, FEXP10, + /// BUILTIN_OP_END - This must be the last enum value in this list. /// The target-specific pre-isel opcode values start here. BUILTIN_OP_END Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -513,13 +513,31 @@ def int_powi : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty]>; def int_sin : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_cos : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_acos : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_asin : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_atan : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_atan2 : Intrinsic<[llvm_anyfloat_ty], + [LLVMMatchType<0>, LLVMMatchType<0>]>; + def int_tan : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_pow : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>]>; + def int_cosh : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_sinh : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_tanh : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + + def int_asinh : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_acosh : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_atanh : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + + def int_lgamma : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_tgamma : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_log : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_log10: Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_log2 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_exp : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_exp2 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_exp10 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_fabs : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_copysign : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>]>; Index: include/llvm/IR/RuntimeLibcalls.def =================================================================== --- include/llvm/IR/RuntimeLibcalls.def +++ include/llvm/IR/RuntimeLibcalls.def @@ -255,6 +255,127 @@ HANDLE_LIBCALL(FMAX_F128, "fmaxl") HANDLE_LIBCALL(FMAX_PPCF128, "fmaxl") +HANDLE_LIBCALL(ACOS_F32, "acosf") +HANDLE_LIBCALL(ACOS_F64, "acos") +HANDLE_LIBCALL(ACOS_F80, "acosl") +HANDLE_LIBCALL(ACOS_F128, "acosl") +HANDLE_LIBCALL(ACOS_PPCF128, "acosl") +HANDLE_LIBCALL(ACOS_FINITE_F32, "__acosf_finite") +HANDLE_LIBCALL(ACOS_FINITE_F64, "__acos_finite") +HANDLE_LIBCALL(ACOS_FINITE_F80, "__acosl_finite") +HANDLE_LIBCALL(ACOS_FINITE_F128, "__acosl_finite") +HANDLE_LIBCALL(ACOS_FINITE_PPCF128, "__acosl_finite") +HANDLE_LIBCALL(ACOSH_F32, "acoshf") +HANDLE_LIBCALL(ACOSH_F64, "acosh") +HANDLE_LIBCALL(ACOSH_F80, "acoshl") +HANDLE_LIBCALL(ACOSH_F128, "acoshl") +HANDLE_LIBCALL(ACOSH_PPCF128, "acoshl") +HANDLE_LIBCALL(ACOSH_FINITE_F32, "__acoshf_finite") +HANDLE_LIBCALL(ACOSH_FINITE_F64, "__acosh_finite") +HANDLE_LIBCALL(ACOSH_FINITE_F80, "__acoshl_finite") +HANDLE_LIBCALL(ACOSH_FINITE_F128, "__acoshl_finite") +HANDLE_LIBCALL(ACOSH_FINITE_PPCF128, "__acoshl_finite") +HANDLE_LIBCALL(ASIN_F32, "asinf") +HANDLE_LIBCALL(ASIN_F64, "asin") +HANDLE_LIBCALL(ASIN_F80, "asinl") +HANDLE_LIBCALL(ASIN_F128, "asinl") +HANDLE_LIBCALL(ASIN_PPCF128, "asinl") +HANDLE_LIBCALL(ASIN_FINITE_F32, "__asinf_finite") +HANDLE_LIBCALL(ASIN_FINITE_F64, "__asin_finite") +HANDLE_LIBCALL(ASIN_FINITE_F80, "__asinl_finite") +HANDLE_LIBCALL(ASIN_FINITE_F128, "__asinl_finite") +HANDLE_LIBCALL(ASIN_FINITE_PPCF128, "__asinl_finite") +HANDLE_LIBCALL(ASINH_F32, "asinhf") +HANDLE_LIBCALL(ASINH_F64, "asinh") +HANDLE_LIBCALL(ASINH_F80, "asinhl") +HANDLE_LIBCALL(ASINH_F128, "asinhl") +HANDLE_LIBCALL(ASINH_PPCF128, "asinhl") +HANDLE_LIBCALL(ATAN_F32, "atanf") +HANDLE_LIBCALL(ATAN_F64, "atan") +HANDLE_LIBCALL(ATAN_F80, "atanl") +HANDLE_LIBCALL(ATAN_F128, "atanl") +HANDLE_LIBCALL(ATAN_PPCF128, "atanl") +HANDLE_LIBCALL(ATAN2_F32, "atan2f") +HANDLE_LIBCALL(ATAN2_F64, "atan2") +HANDLE_LIBCALL(ATAN2_F80, "atan2l") +HANDLE_LIBCALL(ATAN2_F128, "atan2l") +HANDLE_LIBCALL(ATAN2_PPCF128, "atan2l") +HANDLE_LIBCALL(ATAN2_FINITE_F32, "__atan2f_finite") +HANDLE_LIBCALL(ATAN2_FINITE_F64, "__atan2_finite") +HANDLE_LIBCALL(ATAN2_FINITE_F80, "__atan2l_finite") +HANDLE_LIBCALL(ATAN2_FINITE_F128, "__atan2l_finite") +HANDLE_LIBCALL(ATAN2_FINITE_PPCF128, "__atan2l_finite") +HANDLE_LIBCALL(ATANH_F32, "atanhf") +HANDLE_LIBCALL(ATANH_F64, "atanh") +HANDLE_LIBCALL(ATANH_F80, "atanhl") +HANDLE_LIBCALL(ATANH_F128, "atanhl") +HANDLE_LIBCALL(ATANH_PPCF128, "atanhl") +HANDLE_LIBCALL(ATANH_FINITE_F32, "__atanhf_finite") +HANDLE_LIBCALL(ATANH_FINITE_F64, "__atanh_finite") +HANDLE_LIBCALL(ATANH_FINITE_F80, "__atanhl_finite") +HANDLE_LIBCALL(ATANH_FINITE_F128, "__atanhl_finite") +HANDLE_LIBCALL(ATANH_FINITE_PPCF128, "__atanhl_finite") +HANDLE_LIBCALL(COSH_F32, "coshf") +HANDLE_LIBCALL(COSH_F64, "cosh") +HANDLE_LIBCALL(COSH_F80, "coshl") +HANDLE_LIBCALL(COSH_F128, "coshl") +HANDLE_LIBCALL(COSH_PPCF128, "coshl") +HANDLE_LIBCALL(COSH_FINITE_F32, "__coshf_finite") +HANDLE_LIBCALL(COSH_FINITE_F64, "__cosh_finite") +HANDLE_LIBCALL(COSH_FINITE_F80, "__coshl_finite") +HANDLE_LIBCALL(COSH_FINITE_F128, "__coshl_finite") +HANDLE_LIBCALL(COSH_FINITE_PPCF128, "__coshl_finite") +HANDLE_LIBCALL(EXP10_F32, "exp10f") +HANDLE_LIBCALL(EXP10_F64, "exp10") +HANDLE_LIBCALL(EXP10_F80, "exp10l") +HANDLE_LIBCALL(EXP10_F128, "exp10l") +HANDLE_LIBCALL(EXP10_PPCF128, "exp10l") +HANDLE_LIBCALL(EXP10_FINITE_F32, "__exp10f_finite") +HANDLE_LIBCALL(EXP10_FINITE_F64, "__exp10_finite") +HANDLE_LIBCALL(EXP10_FINITE_F80, "__exp10l_finite") +HANDLE_LIBCALL(EXP10_FINITE_F128, "__exp10l_finite") +HANDLE_LIBCALL(EXP10_FINITE_PPCF128, "__exp10l_finite") +HANDLE_LIBCALL(LGAMMA_F32, "lgammaf") +HANDLE_LIBCALL(LGAMMA_F64, "lgamma") +HANDLE_LIBCALL(LGAMMA_F80, "lgammal") +HANDLE_LIBCALL(LGAMMA_F128, "lgammal") +HANDLE_LIBCALL(LGAMMA_PPCF128, "lgammal") +HANDLE_LIBCALL(LGAMMA_FINITE_F32, "__lgammaf_r_finite") +HANDLE_LIBCALL(LGAMMA_FINITE_F64, "__lgamma_r_finite") +HANDLE_LIBCALL(LGAMMA_FINITE_F80, "__lgammal_r_finite") +HANDLE_LIBCALL(LGAMMA_FINITE_F128, "__lgammal_r_finite") +HANDLE_LIBCALL(LGAMMA_FINITE_PPCF128, "__lgammal_r_finite") +HANDLE_LIBCALL(SINH_F32, "sinhf") +HANDLE_LIBCALL(SINH_F64, "sinh") +HANDLE_LIBCALL(SINH_F80, "sinhl") +HANDLE_LIBCALL(SINH_F128, "sinhl") +HANDLE_LIBCALL(SINH_PPCF128, "sinhl") +HANDLE_LIBCALL(SINH_FINITE_F32, "__sinhf_finite") +HANDLE_LIBCALL(SINH_FINITE_F64, "__sinh_finite") +HANDLE_LIBCALL(SINH_FINITE_F80, "__sinhl_finite") +HANDLE_LIBCALL(SINH_FINITE_F128, "__sinhl_finite") +HANDLE_LIBCALL(SINH_FINITE_PPCF128, "__sinhl_finite") +HANDLE_LIBCALL(TAN_F32, "tanf") +HANDLE_LIBCALL(TAN_F64, "tan") +HANDLE_LIBCALL(TAN_F80, "tanl") +HANDLE_LIBCALL(TAN_F128, "tanl") +HANDLE_LIBCALL(TAN_PPCF128, "tanl") +HANDLE_LIBCALL(TANH_F32, "tanhf") +HANDLE_LIBCALL(TANH_F64, "tanh") +HANDLE_LIBCALL(TANH_F80, "tanhl") +HANDLE_LIBCALL(TANH_F128, "tanhl") +HANDLE_LIBCALL(TANH_PPCF128, "tanhl") +HANDLE_LIBCALL(TGAMMA_F32, "tgammaf") +HANDLE_LIBCALL(TGAMMA_F64, "tgamma") +HANDLE_LIBCALL(TGAMMA_F80, "tgammal") +HANDLE_LIBCALL(TGAMMA_F128, "tgammal") +HANDLE_LIBCALL(TGAMMA_PPCF128, "tgammal") +HANDLE_LIBCALL(TGAMMA_FINITE_F32, "__gammaf_r_finite") +HANDLE_LIBCALL(TGAMMA_FINITE_F64, "__gamma_r_finite") +HANDLE_LIBCALL(TGAMMA_FINITE_F80, "__gammal_r_finite") +HANDLE_LIBCALL(TGAMMA_FINITE_F128, "__gammal_r_finite") +HANDLE_LIBCALL(TGAMMA_FINITE_PPCF128, "__gammal_r_finite") + // Conversion HANDLE_LIBCALL(FPEXT_F32_PPCF128, "__gcc_stoq") HANDLE_LIBCALL(FPEXT_F64_PPCF128, "__gcc_dtoq") Index: lib/Analysis/TargetLibraryInfo.cpp =================================================================== --- lib/Analysis/TargetLibraryInfo.cpp +++ lib/Analysis/TargetLibraryInfo.cpp @@ -24,7 +24,10 @@ clEnumValN(TargetLibraryInfoImpl::Accelerate, "Accelerate", "Accelerate framework"), clEnumValN(TargetLibraryInfoImpl::SVML, "SVML", - "Intel SVML library"))); + "Intel SVML library"), + clEnumValN(TargetLibraryInfoImpl::SLEEF, "SLEEF", + "SIMD Library for Evaluating Elementary Functions"))); + StringRef const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] = { #define TLI_DEFINE_STRING @@ -383,9 +386,14 @@ // Fall through to disable all of them. LLVM_FALLTHROUGH; default: - TLI.setUnavailable(LibFunc_exp10); - TLI.setUnavailable(LibFunc_exp10f); - TLI.setUnavailable(LibFunc_exp10l); + // Allow exp10, exp10f, exp10l on AArch64 as they do not appear broken + // on Linux (GLIBC). + if (T.getArch() != Triple::aarch64 && + T.getArch() != Triple::aarch64_be) { + TLI.setUnavailable(LibFunc_exp10); + TLI.setUnavailable(LibFunc_exp10f); + TLI.setUnavailable(LibFunc_exp10l); + } } // ffsl is available on at least Darwin, Mac OS X, iOS, FreeBSD, and @@ -543,7 +551,8 @@ initialize(*this, Triple(), StandardNames); } -TargetLibraryInfoImpl::TargetLibraryInfoImpl(const Triple &T) { +TargetLibraryInfoImpl::TargetLibraryInfoImpl(const Triple &T) + : TT(T) { // Default to everything being available. memset(AvailableArray, -1, sizeof(AvailableArray)); @@ -551,16 +560,18 @@ } TargetLibraryInfoImpl::TargetLibraryInfoImpl(const TargetLibraryInfoImpl &TLI) - : CustomNames(TLI.CustomNames), ShouldExtI32Param(TLI.ShouldExtI32Param), - ShouldExtI32Return(TLI.ShouldExtI32Return), - ShouldSignExtI32Param(TLI.ShouldSignExtI32Param) { + : TT(TLI.TT), CustomNames(TLI.CustomNames), + ShouldExtI32Param(TLI.ShouldExtI32Param), + ShouldExtI32Return(TLI.ShouldExtI32Return), + ShouldSignExtI32Param(TLI.ShouldSignExtI32Param) { memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray)); VectorDescs = TLI.VectorDescs; ScalarDescs = TLI.ScalarDescs; } TargetLibraryInfoImpl::TargetLibraryInfoImpl(TargetLibraryInfoImpl &&TLI) - : CustomNames(std::move(TLI.CustomNames)), + : TT(std::move(TLI.TT)), + CustomNames(std::move(TLI.CustomNames)), ShouldExtI32Param(TLI.ShouldExtI32Param), ShouldExtI32Return(TLI.ShouldExtI32Return), ShouldSignExtI32Param(TLI.ShouldSignExtI32Param) { @@ -571,6 +582,7 @@ } TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(const TargetLibraryInfoImpl &TLI) { + TT = TLI.TT; CustomNames = TLI.CustomNames; ShouldExtI32Param = TLI.ShouldExtI32Param; ShouldExtI32Return = TLI.ShouldExtI32Return; @@ -580,6 +592,7 @@ } TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(TargetLibraryInfoImpl &&TLI) { + TT = std::move(TLI.TT); CustomNames = std::move(TLI.CustomNames); ShouldExtI32Param = TLI.ShouldExtI32Param; ShouldExtI32Return = TLI.ShouldExtI32Return; @@ -1234,6 +1247,9 @@ case LibFunc_floor: case LibFunc_floorf: case LibFunc_floorl: + case LibFunc_lgamma: + case LibFunc_lgammaf: + case LibFunc_lgammal: case LibFunc_log10: case LibFunc_log10_finite: case LibFunc_log10f: @@ -1288,6 +1304,9 @@ case LibFunc_tanhf: case LibFunc_tanhl: case LibFunc_tanl: + case LibFunc_tgamma: + case LibFunc_tgammaf: + case LibFunc_tgammal: case LibFunc_trunc: case LibFunc_truncf: case LibFunc_truncl: @@ -1322,6 +1341,16 @@ FTy.getReturnType() == FTy.getParamType(0) && FTy.getReturnType() == FTy.getParamType(1)); + case LibFunc_lgamma_finite: + case LibFunc_lgammaf_finite: + case LibFunc_lgammal_finite: + case LibFunc_tgamma_finite: + case LibFunc_tgammaf_finite: + case LibFunc_tgammal_finite: + return (NumParams == 2 && FTy.getReturnType()->isFloatingPointTy() && + FTy.getReturnType() == FTy.getParamType(0) && + FTy.getParamType(1)->isPointerTy()); + case LibFunc_ldexp: case LibFunc_ldexpf: case LibFunc_ldexpl: @@ -1598,6 +1627,183 @@ addVectorizableFunctions(VecFuncs); break; } + case SLEEF: { + if (TT.getArch() == llvm::Triple::aarch64 || + TT.getArch() == llvm::Triple::aarch64_be) { + const VecDesc AArch64TwoAndFourLaneVecFuncs[] = { + { "acos", "_ZGVnN2v_acos", 2 }, + { "acos", "_ZGVnN4v_acosf", 4 }, + { "acosf", "_ZGVnN4v_acosf", 4 }, + { "llvm.acos.f64", "_ZGVnN2v_acos", 2 }, + { "llvm.acos.f32", "_ZGVnN4v_acosf", 4 }, + { "llvm.acos.v2f64", "_ZGVnN2v_acos", 2 }, + { "llvm.acos.v4f32", "_ZGVnN4v_acosf", 4 }, + + { "asin", "_ZGVnN2v_asin", 2 }, + { "asin", "_ZGVnN4v_asinf", 4 }, + { "asinf", "_ZGVnN4v_asinf", 4 }, + { "llvm.asin.f64", "_ZGVnN2v_asin", 2 }, + { "llvm.asin.f32", "_ZGVnN4v_asinf", 4 }, + { "llvm.asin.v2f64", "_ZGVnN2v_asin", 2 }, + { "llvm.asin.v4f32", "_ZGVnN4v_asinf", 4 }, + + { "atan", "_ZGVnN2v_atan", 2 }, + { "atan", "_ZGVnN4v_atanf", 4 }, + { "atanf", "_ZGVnN4v_atanf", 4 }, + { "llvm.atan.f64", "_ZGVnN2v_atan", 2 }, + { "llvm.atan.f32", "_ZGVnN4v_atanf", 4 }, + { "llvm.atan.v2f64", "_ZGVnN2v_atan", 2 }, + { "llvm.atan.v4f32", "_ZGVnN4v_atanf", 4 }, + + { "atan2", "_ZGVnN2vv_atan2", 2 }, + { "atan2", "_ZGVnN4vv_atan2f", 4 }, + { "atan2f", "_ZGVnN4vv_atan2f", 4 }, + { "llvm.atan2.f64", "_ZGVnN2vv_atan2", 2 }, + { "llvm.atan2.f32", "_ZGVnN4vv_atan2f", 4 }, + { "llvm.atan2.v2f64", "_ZGVnN2vv_atan2", 2 }, + { "llvm.atan2.v4f32", "_ZGVnN4vv_atan2f", 4 }, + + { "atanh", "_ZGVnN2v_atanh", 2 }, + { "atanh", "_ZGVnN4v_atanhf", 4 }, + { "atanhf", "_ZGVnN4v_atanhf", 4 }, + { "llvm.atanh.f64", "_ZGVnN2v_atanh", 2 }, + { "llvm.atanh.f32", "_ZGVnN4v_atanhf", 4 }, + { "llvm.atanh.v2f64", "_ZGVnN2v_atanh", 2 }, + { "llvm.atanh.v4f32", "_ZGVnN4v_atanhf", 4 }, + + { "cos", "_ZGVnN2v_cos", 2 }, + { "cos", "_ZGVnN4v_cosf", 4 }, + { "cosf", "_ZGVnN4v_cosf", 4 }, + { "llvm.cos.f64", "_ZGVnN2v_cos", 2 }, + { "llvm.cos.f32", "_ZGVnN4v_cosf", 4 }, + { "llvm.cos.v2f64", "_ZGVnN2v_cos", 2 }, + { "llvm.cos.v4f32", "_ZGVnN4v_cosf", 4 }, + + { "cosh", "_ZGVnN2v_cosh", 2 }, + { "cosh", "_ZGVnN4v_coshf", 4 }, + { "coshf", "_ZGVnN4v_coshf", 4 }, + { "llvm.cosh.f64", "_ZGVnN2v_cosh", 2 }, + { "llvm.cosh.f32", "_ZGVnN4v_coshf", 4 }, + { "llvm.cosh.v2f64", "_ZGVnN2v_cosh", 2 }, + { "llvm.cosh.v4f32", "_ZGVnN4v_coshf", 4 }, + + { "exp", "_ZGVnN2v_exp", 2 }, + { "exp", "_ZGVnN4v_expf", 4 }, + { "expf", "_ZGVnN4v_expf", 4 }, + { "llvm.exp.f64", "_ZGVnN2v_exp", 2 }, + { "llvm.exp.f32", "_ZGVnN4v_expf", 4 }, + { "llvm.exp.v2f64", "_ZGVnN2v_exp", 2 }, + { "llvm.exp.v4f32", "_ZGVnN4v_expf", 4 }, + + { "exp2", "_ZGVnN2v_exp2", 2 }, + { "exp2", "_ZGVnN4v_exp2f", 4 }, + { "exp2f", "_ZGVnN4v_exp2f", 4 }, + { "llvm.exp2.f64", "_ZGVnN2v_exp2", 2 }, + { "llvm.exp2.f32", "_ZGVnN4v_exp2f", 4 }, + { "llvm.exp2.v2f64", "_ZGVnN2v_exp2", 2 }, + { "llvm.exp2.v4f32", "_ZGVnN4v_exp2f", 4 }, + + { "exp10", "_ZGVnN2v_exp10", 2 }, + { "exp10", "_ZGVnN4v_exp10f", 4 }, + { "exp10f", "_ZGVnN4v_exp10f", 4 }, + { "llvm.exp10.f64", "_ZGVnN2v_exp10", 2 }, + { "llvm.exp10.f32", "_ZGVnN4v_exp10f", 4 }, + { "llvm.exp10.v2f64", "_ZGVnN2v_exp10", 2 }, + { "llvm.exp10.v4f32", "_ZGVnN4v_exp10f", 4 }, + + { "lgamma", "_ZGVnN2v_lgamma", 2 }, + { "lgamma", "_ZGVnN4v_lgammaf", 4 }, + { "lgammaf", "_ZGVnN4v_lgammaf", 4 }, + { "llvm.lgamma.f64", "_ZGVnN2v_lgamma", 2 }, + { "llvm.lgamma.f32", "_ZGVnN4v_lgammaf", 4 }, + { "llvm.lgamma.v2f64", "_ZGVnN2v_lgamma", 2 }, + { "llvm.lgamma.v4f32", "_ZGVnN4v_lgammaf", 4 }, + + { "log", "_ZGVnN2v_log", 2 }, + { "log", "_ZGVnN4v_logf", 4 }, + { "logf", "_ZGVnN4v_logf", 4 }, + { "llvm.log.f64", "_ZGVnN2v_log", 2 }, + { "llvm.log.f32", "_ZGVnN4v_logf", 4 }, + { "llvm.log.v2f64", "_ZGVnN2v_log", 2 }, + { "llvm.log.v4f32", "_ZGVnN4v_logf", 4 }, + + { "log2", "_ZGVnN2v_log2", 2 }, + { "log2", "_ZGVnN4v_log2f", 4 }, + { "log2f", "_ZGVnN4v_log2f", 4 }, + { "llvm.log2.f64", "_ZGVnN2v_log2", 2 }, + { "llvm.log2.f32", "_ZGVnN4v_log2f", 4 }, + { "llvm.log2.v2f64", "_ZGVnN2v_log2", 2 }, + { "llvm.log2.v4f32", "_ZGVnN4v_log2f", 4 }, + + { "log10", "_ZGVnN2v_log10", 2 }, + { "log10", "_ZGVnN4v_log10f", 4 }, + { "log10f", "_ZGVnN4v_log10f", 4 }, + { "llvm.log10.f64", "_ZGVnN2v_log10", 2 }, + { "llvm.log10.f32", "_ZGVnN4v_log10f", 4 }, + { "llvm.log10.v2f64", "_ZGVnN2v_log10", 2 }, + { "llvm.log10.v4f32", "_ZGVnN4v_log10f", 4 }, + + { "pow", "_ZGVnN2vv_pow", 2 }, + { "pow", "_ZGVnN4vv_powf", 4 }, + { "powf", "_ZGVnN4vv_powf", 4 }, + { "llvm.pow.f64", "_ZGVnN2vv_pow", 2 }, + { "llvm.pow.f32", "_ZGVnN4vv_powf", 4 }, + { "llvm.pow.v2f64", "_ZGVnN2vv_pow", 2 }, + { "llvm.pow.v4f32", "_ZGVnN4vv_powf", 4 }, + + { "sin", "_ZGVnN2v_sin", 2 }, + { "sin", "_ZGVnN4v_sinf", 4 }, + { "sinf", "_ZGVnN4v_sinf", 4 }, + { "llvm.sin.f64", "_ZGVnN2v_sin", 2 }, + { "llvm.sin.f32", "_ZGVnN4v_sinf", 4 }, + { "llvm.sin.v2f64", "_ZGVnN2v_sin", 2 }, + { "llvm.sin.v4f32", "_ZGVnN4v_sinf", 4 }, + + { "sinh", "_ZGVnN2v_sinh", 2 }, + { "sinh", "_ZGVnN4v_sinhf", 4 }, + { "sinhf", "_ZGVnN4v_sinhf", 4 }, + { "llvm.sinh.f64", "_ZGVnN2v_sinh", 2 }, + { "llvm.sinh.f32", "_ZGVnN4v_sinhf", 4 }, + { "llvm.sinh.v2f64", "_ZGVnN2v_sinh", 2 }, + { "llvm.sinh.v4f32", "_ZGVnN4v_sinhf", 4 }, + + { "sqrt", "_ZGVnN2v_sqrt", 2 }, + { "sqrt", "_ZGVnN4v_sqrtf", 4 }, + { "sqrtf", "_ZGVnN4v_sqrtf", 4 }, + { "llvm.sqrt.f64", "_ZGVnN2v_sqrt", 2 }, + { "llvm.sqrt.f32", "_ZGVnN4v_sqrtf", 4 }, + { "llvm.sqrt.v2f64", "_ZGVnN2v_sqrt", 2 }, + { "llvm.sqrt.v4f32", "_ZGVnN4v_sqrtf", 4 }, + + { "tan", "_ZGVnN2v_tan", 2 }, + { "tan", "_ZGVnN4v_tanf", 4 }, + { "tanf", "_ZGVnN4v_tanf", 4 }, + { "llvm.tan.f64", "_ZGVnN2v_tan", 2 }, + { "llvm.tan.f32", "_ZGVnN4v_tanf", 4 }, + { "llvm.tan.v2f64", "_ZGVnN2v_tan", 2 }, + { "llvm.tan.v4f32", "_ZGVnN4v_tanf", 4 }, + + { "tanh", "_ZGVnN2v_tanh", 2 }, + { "tanh", "_ZGVnN4v_tanhf", 4 }, + { "tanhf", "_ZGVnN4v_tanhf", 4 }, + { "llvm.tanh.f64", "_ZGVnN2v_tanh", 2 }, + { "llvm.tanh.f32", "_ZGVnN4v_tanhf", 4 }, + { "llvm.tanh.v2f64", "_ZGVnN2v_tanh", 2 }, + { "llvm.tanh.v4f32", "_ZGVnN4v_tanhf", 4 }, + + { "tgamma", "_ZGVnN2v_tgamma", 2 }, + { "tgamma", "_ZGVnN4v_tgammaf", 4 }, + { "tgammaf", "_ZGVnN4v_tgammaf", 4 }, + { "llvm.tgamma.f64", "_ZGVnN2v_tgamma", 2 }, + { "llvm.tgamma.f32", "_ZGVnN4v_tgammaf", 4 }, + { "llvm.tgamma.v2f64", "_ZGVnN2v_tgamma", 2 }, + { "llvm.tgamma.v4f32", "_ZGVnN4v_tgammaf", 4 }, + }; + + addVectorizableFunctions(AArch64TwoAndFourLaneVecFuncs); + } + break; + } case NoLibrary: break; } Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1105,6 +1105,20 @@ case ISD::STRICT_FLOG: case ISD::STRICT_FLOG10: case ISD::STRICT_FLOG2: + case ISD::STRICT_FACOS: + case ISD::STRICT_FASIN: + case ISD::STRICT_FATAN: + case ISD::STRICT_FTAN: + case ISD::STRICT_FCOSH: + case ISD::STRICT_FSINH: + case ISD::STRICT_FTANH: + case ISD::STRICT_FASINH: + case ISD::STRICT_FACOSH: + case ISD::STRICT_FATANH: + case ISD::STRICT_FLGAMMA: + case ISD::STRICT_FTGAMMA: + case ISD::STRICT_FEXP10: + case ISD::STRICT_FATAN2: case ISD::STRICT_FRINT: case ISD::STRICT_FNEARBYINT: case ISD::STRICT_FMAXNUM: @@ -3931,6 +3945,156 @@ RTLIB::EXP2_F80, RTLIB::EXP2_F128, RTLIB::EXP2_PPCF128)); break; + case ISD::FACOS: + case ISD::STRICT_FACOS: + if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_acos_finite)) + Results.push_back(ExpandFPLibCall(Node, RTLIB::ACOS_FINITE_F32, + RTLIB::ACOS_FINITE_F64, + RTLIB::ACOS_FINITE_F80, + RTLIB::ACOS_FINITE_F128, + RTLIB::ACOS_FINITE_PPCF128)); + else + Results.push_back(ExpandFPLibCall(Node, RTLIB::ACOS_F32, + RTLIB::ACOS_F64, + RTLIB::ACOS_F80, + RTLIB::ACOS_F128, + RTLIB::ACOS_PPCF128)); + break; + case ISD::FASIN: + case ISD::STRICT_FASIN: + if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_asin_finite)) + Results.push_back(ExpandFPLibCall(Node, RTLIB::ASIN_FINITE_F32, + RTLIB::ASIN_FINITE_F64, + RTLIB::ASIN_FINITE_F80, + RTLIB::ASIN_FINITE_F128, + RTLIB::ASIN_FINITE_PPCF128)); + else + Results.push_back(ExpandFPLibCall(Node, RTLIB::ASIN_F32, + RTLIB::ASIN_F64, + RTLIB::ASIN_F80, + RTLIB::ASIN_F128, + RTLIB::ASIN_PPCF128)); + break; + case ISD::FATAN2: + case ISD::STRICT_FATAN2: + if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_atan2_finite)) + Results.push_back(ExpandFPLibCall(Node, RTLIB::ATAN2_FINITE_F32, + RTLIB::ATAN2_FINITE_F64, + RTLIB::ATAN2_FINITE_F80, + RTLIB::ATAN2_FINITE_F128, + RTLIB::ATAN2_FINITE_PPCF128)); + else + Results.push_back(ExpandFPLibCall(Node, RTLIB::ATAN2_F32, + RTLIB::ATAN2_F64, + RTLIB::ATAN2_F80, + RTLIB::ATAN2_F128, + RTLIB::ATAN2_PPCF128)); + break; + case ISD::FCOSH: + case ISD::STRICT_FCOSH: + if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_cosh_finite)) + Results.push_back(ExpandFPLibCall(Node, RTLIB::COSH_FINITE_F32, + RTLIB::COSH_FINITE_F64, + RTLIB::COSH_FINITE_F80, + RTLIB::COSH_FINITE_F128, + RTLIB::COSH_FINITE_PPCF128)); + else + Results.push_back(ExpandFPLibCall(Node, RTLIB::COSH_F32, + RTLIB::COSH_F64, + RTLIB::COSH_F80, + RTLIB::COSH_F128, + RTLIB::COSH_PPCF128)); + break; + case ISD::FSINH: + case ISD::STRICT_FSINH: + if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_sinh_finite)) + Results.push_back(ExpandFPLibCall(Node, RTLIB::SINH_FINITE_F32, + RTLIB::SINH_FINITE_F64, + RTLIB::SINH_FINITE_F80, + RTLIB::SINH_FINITE_F128, + RTLIB::SINH_FINITE_PPCF128)); + else + Results.push_back(ExpandFPLibCall(Node, RTLIB::SINH_F32, + RTLIB::SINH_F64, + RTLIB::SINH_F80, + RTLIB::SINH_F128, + RTLIB::SINH_PPCF128)); + break; + case ISD::FACOSH: + case ISD::STRICT_FACOSH: + if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_acosh_finite)) + Results.push_back(ExpandFPLibCall(Node, RTLIB::ACOSH_FINITE_F32, + RTLIB::ACOSH_FINITE_F64, + RTLIB::ACOSH_FINITE_F80, + RTLIB::ACOSH_FINITE_F128, + RTLIB::ACOSH_FINITE_PPCF128)); + else + Results.push_back(ExpandFPLibCall(Node, RTLIB::ACOSH_F32, + RTLIB::ACOSH_F64, + RTLIB::ACOSH_F80, + RTLIB::ACOSH_F128, + RTLIB::ACOSH_PPCF128)); + break; + case ISD::FATANH: + case ISD::STRICT_FATANH: + if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_atanh_finite)) + Results.push_back(ExpandFPLibCall(Node, RTLIB::ATANH_FINITE_F32, + RTLIB::ATANH_FINITE_F64, + RTLIB::ATANH_FINITE_F80, + RTLIB::ATANH_FINITE_F128, + RTLIB::ATANH_FINITE_PPCF128)); + else + Results.push_back(ExpandFPLibCall(Node, RTLIB::ATANH_F32, + RTLIB::ATANH_F64, + RTLIB::ATANH_F80, + RTLIB::ATANH_F128, + RTLIB::ATANH_PPCF128)); + break; + case ISD::FEXP10: + case ISD::STRICT_FEXP10: + if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp10_finite)) + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP10_FINITE_F32, + RTLIB::EXP10_FINITE_F64, + RTLIB::EXP10_FINITE_F80, + RTLIB::EXP10_FINITE_F128, + RTLIB::EXP10_FINITE_PPCF128)); + else + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP10_F32, + RTLIB::EXP10_F64, + RTLIB::EXP10_F80, + RTLIB::EXP10_F128, + RTLIB::EXP10_PPCF128)); + break; + case ISD::FLGAMMA: + case ISD::STRICT_FLGAMMA: + if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_lgamma_finite)) + Results.push_back(ExpandFPLibCall(Node, RTLIB::LGAMMA_FINITE_F32, + RTLIB::LGAMMA_FINITE_F64, + RTLIB::LGAMMA_FINITE_F80, + RTLIB::LGAMMA_FINITE_F128, + RTLIB::LGAMMA_FINITE_PPCF128)); + else + Results.push_back(ExpandFPLibCall(Node, RTLIB::LGAMMA_F32, + RTLIB::LGAMMA_F64, + RTLIB::LGAMMA_F80, + RTLIB::LGAMMA_F128, + RTLIB::LGAMMA_PPCF128)); + break; + case ISD::FTGAMMA: + case ISD::STRICT_FTGAMMA: + if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_tgamma_finite)) + Results.push_back(ExpandFPLibCall(Node, RTLIB::TGAMMA_FINITE_F32, + RTLIB::TGAMMA_FINITE_F64, + RTLIB::TGAMMA_FINITE_F80, + RTLIB::TGAMMA_FINITE_F128, + RTLIB::TGAMMA_FINITE_PPCF128)); + else + Results.push_back(ExpandFPLibCall(Node, RTLIB::TGAMMA_F32, + RTLIB::TGAMMA_F64, + RTLIB::TGAMMA_F80, + RTLIB::TGAMMA_F128, + RTLIB::TGAMMA_PPCF128)); + break; case ISD::FTRUNC: case ISD::STRICT_FTRUNC: Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, @@ -4336,6 +4500,7 @@ case ISD::FMINNUM: case ISD::FMAXNUM: case ISD::FPOW: + case ISD::FATAN2: Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, @@ -4384,6 +4549,19 @@ case ISD::FABS: case ISD::FEXP: case ISD::FEXP2: + case ISD::FACOS: + case ISD::FASIN: + case ISD::FATAN: + case ISD::FTAN: + case ISD::FCOSH: + case ISD::FSINH: + case ISD::FTANH: + case ISD::FASINH: + case ISD::FACOSH: + case ISD::FATANH: + case ISD::FLGAMMA: + case ISD::FTGAMMA: + case ISD::FEXP10: Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, Index: lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -99,6 +99,20 @@ case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break; case ISD::FROUND: R = SoftenFloatRes_FROUND(N); break; case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break; + case ISD::FACOS: R = SoftenFloatRes_FACOS(N); break; + case ISD::FASIN: R = SoftenFloatRes_FASIN(N); break; + case ISD::FATAN: R = SoftenFloatRes_FATAN(N); break; + case ISD::FATAN2: R = SoftenFloatRes_FATAN2(N); break; + case ISD::FTAN: R = SoftenFloatRes_FTAN(N); break; + case ISD::FCOSH: R = SoftenFloatRes_FCOSH(N); break; + case ISD::FSINH: R = SoftenFloatRes_FSINH(N); break; + case ISD::FTANH: R = SoftenFloatRes_FTANH(N); break; + case ISD::FASINH: R = SoftenFloatRes_FASINH(N); break; + case ISD::FACOSH: R = SoftenFloatRes_FACOSH(N); break; + case ISD::FATANH: R = SoftenFloatRes_FATANH(N); break; + case ISD::FLGAMMA: R = SoftenFloatRes_FLGAMMA(N); break; + case ISD::FTGAMMA: R = SoftenFloatRes_FTGAMMA(N); break; + case ISD::FEXP10: R = SoftenFloatRes_FEXP10(N); break; case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break; @@ -594,6 +608,175 @@ NVT, Op, false, SDLoc(N)).first; } +SDValue DAGTypeLegalizer::SoftenFloatRes_FACOS(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::ACOS_F32, + RTLIB::ACOS_F64, + RTLIB::ACOS_F80, + RTLIB::ACOS_F128, + RTLIB::ACOS_PPCF128), + NVT, Op, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FASIN(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::ASIN_F32, + RTLIB::ASIN_F64, + RTLIB::ASIN_F80, + RTLIB::ASIN_F128, + RTLIB::ASIN_PPCF128), + NVT, Op, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FATAN(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::ATAN_F32, + RTLIB::ATAN_F64, + RTLIB::ATAN_F80, + RTLIB::ATAN_F128, + RTLIB::ATAN_PPCF128), + NVT, Op, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FTAN(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::TAN_F32, + RTLIB::TAN_F64, + RTLIB::TAN_F80, + RTLIB::TAN_F128, + RTLIB::TAN_PPCF128), + NVT, Op, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FCOSH(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::COSH_F32, + RTLIB::COSH_F64, + RTLIB::COSH_F80, + RTLIB::COSH_F128, + RTLIB::COSH_PPCF128), + NVT, Op, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FSINH(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::SINH_F32, + RTLIB::SINH_F64, + RTLIB::SINH_F80, + RTLIB::SINH_F128, + RTLIB::SINH_PPCF128), + NVT, Op, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FTANH(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::TANH_F32, + RTLIB::TANH_F64, + RTLIB::TANH_F80, + RTLIB::TANH_F128, + RTLIB::TANH_PPCF128), + NVT, Op, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FASINH(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::ASINH_F32, + RTLIB::ASINH_F64, + RTLIB::ASINH_F80, + RTLIB::ASINH_F128, + RTLIB::ASINH_PPCF128), + NVT, Op, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FACOSH(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::ACOSH_F32, + RTLIB::ACOSH_F64, + RTLIB::ACOSH_F80, + RTLIB::ACOSH_F128, + RTLIB::ACOSH_PPCF128), + NVT, Op, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FATANH(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::ATANH_F32, + RTLIB::ATANH_F64, + RTLIB::ATANH_F80, + RTLIB::ATANH_F128, + RTLIB::ATANH_PPCF128), + NVT, Op, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FLGAMMA(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::LGAMMA_F32, + RTLIB::LGAMMA_F64, + RTLIB::LGAMMA_F80, + RTLIB::LGAMMA_F128, + RTLIB::LGAMMA_PPCF128), + NVT, Op, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FTGAMMA(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::TGAMMA_F32, + RTLIB::TGAMMA_F64, + RTLIB::TGAMMA_F80, + RTLIB::TGAMMA_F128, + RTLIB::TGAMMA_PPCF128), + NVT, Op, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP10(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::EXP10_F32, + RTLIB::EXP10_F64, + RTLIB::EXP10_F80, + RTLIB::EXP10_F128, + RTLIB::EXP10_PPCF128), + NVT, Op, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FATAN2(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::ATAN2_F32, + RTLIB::ATAN2_F64, + RTLIB::ATAN2_F80, + RTLIB::ATAN2_F128, + RTLIB::ATAN2_PPCF128), + NVT, Ops, false, SDLoc(N)).first; +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); @@ -1107,6 +1290,20 @@ case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break; case ISD::FREM: ExpandFloatRes_FREM(N, Lo, Hi); break; + case ISD::FACOS: ExpandFloatRes_FACOS(N, Lo, Hi); break; + case ISD::FASIN: ExpandFloatRes_FASIN(N, Lo, Hi); break; + case ISD::FATAN: ExpandFloatRes_FATAN(N, Lo, Hi); break; + case ISD::FATAN2: ExpandFloatRes_FATAN2(N, Lo, Hi); break; + case ISD::FTAN: ExpandFloatRes_FTAN(N, Lo, Hi); break; + case ISD::FCOSH: ExpandFloatRes_FCOSH(N, Lo, Hi); break; + case ISD::FSINH: ExpandFloatRes_FSINH(N, Lo, Hi); break; + case ISD::FTANH: ExpandFloatRes_FTANH(N, Lo, Hi); break; + case ISD::FEXP10: ExpandFloatRes_FEXP10(N, Lo, Hi); break; + case ISD::FACOSH: ExpandFloatRes_FACOSH(N, Lo, Hi); break; + case ISD::FASINH: ExpandFloatRes_FASINH(N, Lo, Hi); break; + case ISD::FATANH: ExpandFloatRes_FATANH(N, Lo, Hi); break; + case ISD::FLGAMMA: ExpandFloatRes_FLGAMMA(N, Lo, Hi); break; + case ISD::FTGAMMA: ExpandFloatRes_FTGAMMA(N, Lo, Hi); break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -1432,6 +1629,150 @@ GetPairElements(Call, Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FACOS(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::ACOS_F32, RTLIB::ACOS_F64, + RTLIB::ACOS_F80, RTLIB::ACOS_F128, + RTLIB::ACOS_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FASIN(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::ASIN_F32, RTLIB::ASIN_F64, + RTLIB::ASIN_F80, RTLIB::ASIN_F128, + RTLIB::ASIN_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FATAN(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::ATAN_F32, RTLIB::ATAN_F64, + RTLIB::ATAN_F80, RTLIB::ATAN_F128, + RTLIB::ATAN_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FATAN2(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::ATAN2_F32, + RTLIB::ATAN2_F64, + RTLIB::ATAN2_F80, + RTLIB::ATAN2_F128, + RTLIB::ATAN2_PPCF128), + N->getValueType(0), Ops, false, + SDLoc(N)).first; + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FTAN(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::TAN_F32, RTLIB::TAN_F64, + RTLIB::TAN_F80, RTLIB::TAN_F128, + RTLIB::TAN_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FCOSH(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::COSH_F32, RTLIB::COSH_F64, + RTLIB::COSH_F80, RTLIB::COSH_F128, + RTLIB::COSH_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FSINH(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::SINH_F32, RTLIB::SINH_F64, + RTLIB::SINH_F80, RTLIB::SINH_F128, + RTLIB::SINH_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FTANH(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::TANH_F32, RTLIB::TANH_F64, + RTLIB::TANH_F80, RTLIB::TANH_F128, + RTLIB::TANH_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FASINH(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::ASINH_F32, RTLIB::ASINH_F64, + RTLIB::ASINH_F80, RTLIB::ASINH_F128, + RTLIB::ASINH_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FACOSH(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::ACOSH_F32, RTLIB::ACOSH_F64, + RTLIB::ACOSH_F80, RTLIB::ACOSH_F128, + RTLIB::ACOSH_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FATANH(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::ATANH_F32, RTLIB::ATANH_F64, + RTLIB::ATANH_F80, RTLIB::ATANH_F128, + RTLIB::ATANH_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FLGAMMA(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::LGAMMA_F32, RTLIB::LGAMMA_F64, + RTLIB::LGAMMA_F80, RTLIB::LGAMMA_F128, + RTLIB::LGAMMA_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FTGAMMA(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::TGAMMA_F32, RTLIB::TGAMMA_F64, + RTLIB::TGAMMA_F80, RTLIB::TGAMMA_F128, + RTLIB::TGAMMA_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FEXP10(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::EXP10_F32, RTLIB::EXP10_F64, + RTLIB::EXP10_F80, RTLIB::EXP10_F128, + RTLIB::EXP10_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo, SDValue &Hi) { if (ISD::isNormalLoad(N)) { @@ -1927,6 +2268,19 @@ case ISD::FSQRT: case ISD::FTRUNC: case ISD::FCANONICALIZE: R = PromoteFloatRes_UnaryOp(N); break; + case ISD::FACOS: + case ISD::FASIN: + case ISD::FATAN: + case ISD::FTAN: + case ISD::FCOSH: + case ISD::FSINH: + case ISD::FTANH: + case ISD::FACOSH: + case ISD::FASINH: + case ISD::FATANH: + case ISD::FLGAMMA: + case ISD::FTGAMMA: + case ISD::FEXP10: R = PromoteFloatRes_UnaryOp(N); break; // Binary FP Operations case ISD::FADD: @@ -1940,6 +2294,8 @@ case ISD::FREM: case ISD::FSUB: R = PromoteFloatRes_BinOp(N); break; + case ISD::FATAN2: R = PromoteFloatRes_BinOp(N); break; + case ISD::FMA: // FMA is same as FMAD case ISD::FMAD: R = PromoteFloatRes_FMAD(N); break; Index: lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -521,6 +521,20 @@ SDValue SoftenFloatRes_FRINT(SDNode *N); SDValue SoftenFloatRes_FROUND(SDNode *N); SDValue SoftenFloatRes_FSIN(SDNode *N); + SDValue SoftenFloatRes_FACOS(SDNode *N); + SDValue SoftenFloatRes_FASIN(SDNode *N); + SDValue SoftenFloatRes_FATAN(SDNode *N); + SDValue SoftenFloatRes_FATAN2(SDNode *N); + SDValue SoftenFloatRes_FTAN(SDNode *N); + SDValue SoftenFloatRes_FCOSH(SDNode *N); + SDValue SoftenFloatRes_FSINH(SDNode *N); + SDValue SoftenFloatRes_FTANH(SDNode *N); + SDValue SoftenFloatRes_FASINH(SDNode *N); + SDValue SoftenFloatRes_FACOSH(SDNode *N); + SDValue SoftenFloatRes_FATANH(SDNode *N); + SDValue SoftenFloatRes_FLGAMMA(SDNode *N); + SDValue SoftenFloatRes_FTGAMMA(SDNode *N); + SDValue SoftenFloatRes_FEXP10(SDNode *N); SDValue SoftenFloatRes_FSQRT(SDNode *N); SDValue SoftenFloatRes_FSUB(SDNode *N); SDValue SoftenFloatRes_FTRUNC(SDNode *N); @@ -594,6 +608,20 @@ void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FROUND (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FACOS (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FASIN (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FATAN (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FATAN2 (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FTAN (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FCOSH (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FSINH (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FTANH (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FASINH (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FACOSH (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FATANH (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FLGAMMA (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FTGAMMA (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FEXP10 (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi); Index: lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -321,6 +321,20 @@ case ISD::STRICT_FLOG: case ISD::STRICT_FLOG10: case ISD::STRICT_FLOG2: + case ISD::STRICT_FACOS: + case ISD::STRICT_FASIN: + case ISD::STRICT_FATAN: + case ISD::STRICT_FTAN: + case ISD::STRICT_FCOSH: + case ISD::STRICT_FSINH: + case ISD::STRICT_FTANH: + case ISD::STRICT_FASINH: + case ISD::STRICT_FACOSH: + case ISD::STRICT_FATANH: + case ISD::STRICT_FLGAMMA: + case ISD::STRICT_FTGAMMA: + case ISD::STRICT_FEXP10: + case ISD::STRICT_FATAN2: case ISD::STRICT_FRINT: case ISD::STRICT_FNEARBYINT: case ISD::STRICT_FMAXNUM: @@ -399,6 +413,20 @@ case ISD::FLOG10: case ISD::FEXP: case ISD::FEXP2: + case ISD::FACOS: + case ISD::FASIN: + case ISD::FATAN: + case ISD::FTAN: + case ISD::FCOSH: + case ISD::FSINH: + case ISD::FTANH: + case ISD::FASINH: + case ISD::FACOSH: + case ISD::FATANH: + case ISD::FLGAMMA: + case ISD::FTGAMMA: + case ISD::FEXP10: + case ISD::FATAN2: case ISD::FCEIL: case ISD::FTRUNC: case ISD::FRINT: Index: lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -85,6 +85,19 @@ case ISD::FLOG: case ISD::FLOG10: case ISD::FLOG2: + case ISD::FACOS: + case ISD::FASIN: + case ISD::FATAN: + case ISD::FTAN: + case ISD::FCOSH: + case ISD::FSINH: + case ISD::FTANH: + case ISD::FASINH: + case ISD::FACOSH: + case ISD::FATANH: + case ISD::FLGAMMA: + case ISD::FTGAMMA: + case ISD::FEXP10: case ISD::FNEARBYINT: case ISD::FNEG: case ISD::FP_EXTEND: @@ -140,6 +153,7 @@ case ISD::SHL: case ISD::SRA: case ISD::SRL: + case ISD::FATAN2: R = ScalarizeVecRes_BinOp(N); break; case ISD::FMA: @@ -831,6 +845,19 @@ case ISD::TRUNCATE: case ISD::UINT_TO_FP: case ISD::FCANONICALIZE: + case ISD::FACOS: + case ISD::FASIN: + case ISD::FATAN: + case ISD::FTAN: + case ISD::FCOSH: + case ISD::FSINH: + case ISD::FTANH: + case ISD::FASINH: + case ISD::FACOSH: + case ISD::FATANH: + case ISD::FLGAMMA: + case ISD::FTGAMMA: + case ISD::FEXP10: SplitVecRes_UnaryOp(N, Lo, Hi); break; @@ -873,6 +900,7 @@ case ISD::UADDSAT: case ISD::SSUBSAT: case ISD::USUBSAT: + case ISD::FATAN2: SplitVecRes_BinOp(N, Lo, Hi); break; case ISD::FMA: @@ -894,6 +922,20 @@ case ISD::STRICT_FLOG: case ISD::STRICT_FLOG10: case ISD::STRICT_FLOG2: + case ISD::STRICT_FACOS: + case ISD::STRICT_FASIN: + case ISD::STRICT_FATAN: + case ISD::STRICT_FATAN2: + case ISD::STRICT_FTAN: + case ISD::STRICT_FCOSH: + case ISD::STRICT_FSINH: + case ISD::STRICT_FTANH: + case ISD::STRICT_FASINH: + case ISD::STRICT_FACOSH: + case ISD::STRICT_FATANH: + case ISD::STRICT_FLGAMMA: + case ISD::STRICT_FTGAMMA: + case ISD::STRICT_FEXP10: case ISD::STRICT_FRINT: case ISD::STRICT_FNEARBYINT: case ISD::STRICT_FMAXNUM: @@ -2522,6 +2564,7 @@ case ISD::SADDSAT: case ISD::USUBSAT: case ISD::SSUBSAT: + case ISD::FATAN2: Res = WidenVecRes_Binary(N); break; @@ -2562,6 +2605,20 @@ case ISD::STRICT_FFLOOR: case ISD::STRICT_FROUND: case ISD::STRICT_FTRUNC: + case ISD::STRICT_FACOS: + case ISD::STRICT_FASIN: + case ISD::STRICT_FATAN: + case ISD::STRICT_FATAN2: + case ISD::STRICT_FTAN: + case ISD::STRICT_FCOSH: + case ISD::STRICT_FSINH: + case ISD::STRICT_FTANH: + case ISD::STRICT_FASINH: + case ISD::STRICT_FACOSH: + case ISD::STRICT_FATANH: + case ISD::STRICT_FLGAMMA: + case ISD::STRICT_FTGAMMA: + case ISD::STRICT_FEXP10: Res = WidenVecRes_StrictFP(N); break; @@ -2648,6 +2705,19 @@ case ISD::CTTZ: case ISD::FNEG: case ISD::FCANONICALIZE: + case ISD::FACOS: + case ISD::FASIN: + case ISD::FATAN: + case ISD::FTAN: + case ISD::FCOSH: + case ISD::FSINH: + case ISD::FTANH: + case ISD::FASINH: + case ISD::FACOSH: + case ISD::FATANH: + case ISD::FLGAMMA: + case ISD::FTGAMMA: + case ISD::FEXP10: Res = WidenVecRes_Unary(N); break; case ISD::FMA: Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7595,6 +7595,20 @@ case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; IsUnary = true; break; case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; IsUnary = true; break; case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; IsUnary = true; break; + case ISD::STRICT_FACOS: NewOpc = ISD::FACOS; IsUnary = true; break; + case ISD::STRICT_FASIN: NewOpc = ISD::FASIN; IsUnary = true; break; + case ISD::STRICT_FATAN: NewOpc = ISD::FATAN; IsUnary = true; break; + case ISD::STRICT_FTAN: NewOpc = ISD::FTAN; IsUnary = true; break; + case ISD::STRICT_FCOSH: NewOpc = ISD::FCOSH; IsUnary = true; break; + case ISD::STRICT_FSINH: NewOpc = ISD::FSINH; IsUnary = true; break; + case ISD::STRICT_FTANH: NewOpc = ISD::FTANH; IsUnary = true; break; + case ISD::STRICT_FASINH: NewOpc = ISD::FASINH; IsUnary = true; break; + case ISD::STRICT_FACOSH: NewOpc = ISD::FACOSH; IsUnary = true; break; + case ISD::STRICT_FATANH: NewOpc = ISD::FATANH; IsUnary = true; break; + case ISD::STRICT_FLGAMMA: NewOpc = ISD::FLGAMMA; IsUnary = true; break; + case ISD::STRICT_FTGAMMA: NewOpc = ISD::FTGAMMA; IsUnary = true; break; + case ISD::STRICT_FEXP10: NewOpc = ISD::FEXP10; IsUnary = true; break; + case ISD::STRICT_FATAN2: NewOpc = ISD::FATAN2; break; case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; IsUnary = true; break; case ISD::STRICT_FNEARBYINT: NewOpc = ISD::FNEARBYINT; Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5188,6 +5188,113 @@ return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS); } +// expandNonVectorIntrinsics - return the canonical non-vector libm +// function name for the newly added (experimental) Intrinsics. +static const char* +expandNonVectorIntrinsics(const llvm::EVT &OpEVT, unsigned Opcode, + bool IsFastMath) { + llvm::MVT OpMVT; + + if (OpEVT.isVector()) + OpMVT = OpEVT.getVectorElementType().getSimpleVT(); + else + OpMVT = OpEVT.getSimpleVT(); + + assert(OpMVT.isFloatingPoint() && "Expected a floating-point type!"); + + switch(Opcode) { + default: + return nullptr; + break; + case ISD::FACOS: + if (!OpMVT.isVector()) + return OpMVT == MVT::f32 ? "acosf" : + OpMVT == MVT::f64 ? "acos" : "acosl"; + return nullptr; + break; + case ISD::FASIN: + if (!OpMVT.isVector()) + return OpMVT == MVT::f32 ? "asinf" : + OpMVT == MVT::f64 ? "asin" : "asinl"; + return nullptr; + break; + case ISD::FATAN: + if (!OpMVT.isVector()) + return OpMVT == MVT::f32 ? "atanf" : + OpMVT == MVT::f64 ? "atan" : "atanl"; + return nullptr; + break; + case ISD::FATAN2: + if (!OpMVT.isVector()) + return OpMVT == MVT::f32 ? "atan2f" : + OpMVT == MVT::f64 ? "atan2" : "atan2l"; + return nullptr; + break; + case ISD::FTAN: + if (!OpMVT.isVector()) + return OpMVT == MVT::f32 ? "tanf" : + OpMVT == MVT::f64 ? "tan" : "tanl"; + return nullptr; + break; + case ISD::FCOSH: + if (!OpMVT.isVector()) + return OpMVT == MVT::f32 ? "coshf" : + OpMVT == MVT::f64 ? "cosh" : "coshl"; + return nullptr; + break; + case ISD::FSINH: + if (!OpMVT.isVector()) + return OpMVT == MVT::f32 ? "sinhf" : + OpMVT == MVT::f64 ? "sinh" : "sinhl"; + return nullptr; + break; + case ISD::FTANH: + if (!OpMVT.isVector()) + return OpMVT == MVT::f32 ? "tanhf" : + OpMVT == MVT::f64 ? "tanh" : "tanhl"; + return nullptr; + break; + case ISD::FACOSH: + if (!OpMVT.isVector()) + return OpMVT == MVT::f32 ? "acoshf" : + OpMVT == MVT::f64 ? "acosh" : "acoshl"; + return nullptr; + break; + case ISD::FASINH: + if (!OpMVT.isVector()) + return OpMVT == MVT::f32 ? "asinhf" : + OpMVT == MVT::f64 ? "asinh" : "asinhl"; + return nullptr; + break; + case ISD::FATANH: + if (!OpMVT.isVector()) + return OpMVT == MVT::f32 ? "atanhf" : + OpMVT == MVT::f64 ? "atanh" : "atanhl"; + return nullptr; + break; + case ISD::FLGAMMA: + if (!OpMVT.isVector()) + return OpMVT == MVT::f32 ? "lgammaf" : + OpMVT == MVT::f64 ? "lgamma" : "lgammal"; + return nullptr; + break; + case ISD::FTGAMMA: + if (!OpMVT.isVector()) + return OpMVT == MVT::f32 ? "tgammaf" : + OpMVT == MVT::f64 ? "tgamma" : "tgammal"; + return nullptr; + break; + case ISD::FEXP10: + if (!OpMVT.isVector()) + return OpMVT == MVT::f32 ? "exp10f" : + OpMVT == MVT::f64 ? "exp10" : "exp10l"; + return nullptr; + break; + } + + return nullptr; +} + // getUnderlyingArgReg - Find underlying register used for a truncated or // bitcasted argument. static unsigned getUnderlyingArgReg(const SDValue &N) { @@ -5836,6 +5943,46 @@ setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG, TLI)); return nullptr; + case Intrinsic::acos: + case Intrinsic::asin: + case Intrinsic::atan: + case Intrinsic::atan2: + case Intrinsic::tan: + case Intrinsic::cosh: + case Intrinsic::sinh: + case Intrinsic::tanh: + case Intrinsic::acosh: + case Intrinsic::asinh: + case Intrinsic::atanh: + case Intrinsic::lgamma: + case Intrinsic::tgamma: + case Intrinsic::exp10: { + unsigned Opcode; + switch (Intrinsic) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::acos: Opcode = ISD::FACOS; break; + case Intrinsic::asin: Opcode = ISD::FASIN; break; + case Intrinsic::atan: Opcode = ISD::FATAN; break; + case Intrinsic::atan2: Opcode = ISD::FATAN2; break; + case Intrinsic::tan: Opcode = ISD::FTAN; break; + case Intrinsic::cosh: Opcode = ISD::FCOSH; break; + case Intrinsic::sinh: Opcode = ISD::FSINH; break; + case Intrinsic::tanh: Opcode = ISD::FTANH; break; + case Intrinsic::acosh: Opcode = ISD::FACOSH; break; + case Intrinsic::asinh: Opcode = ISD::FASINH; break; + case Intrinsic::atanh: Opcode = ISD::FATANH; break; + case Intrinsic::lgamma: Opcode = ISD::FLGAMMA; break; + case Intrinsic::tgamma: Opcode = ISD::FTGAMMA; break; + case Intrinsic::exp10: Opcode = ISD::FEXP10; break; + } + + llvm::EVT OpEVT = getValue(I.getArgOperand(0)).getValueType(); + bool IsFastMath = false; + if (const FPMathOperator *FPOp = dyn_cast(&I)) + IsFastMath = FPOp->getFastMathFlags().isFast(); + + return expandNonVectorIntrinsics(OpEVT, Opcode, IsFastMath); + } case Intrinsic::sqrt: case Intrinsic::fabs: case Intrinsic::sin: @@ -7396,6 +7543,90 @@ if (visitUnaryFloatCall(I, ISD::FEXP2)) return; break; + case LibFunc_acos: + case LibFunc_acosf: + case LibFunc_acosl: + if (visitUnaryFloatCall(I, ISD::FACOS)) + return; + break; + case LibFunc_asin: + case LibFunc_asinf: + case LibFunc_asinl: + if (visitUnaryFloatCall(I, ISD::FASIN)) + return; + break; + case LibFunc_atan: + case LibFunc_atanf: + case LibFunc_atanl: + if (visitUnaryFloatCall(I, ISD::FATAN)) + return; + break; + case LibFunc_atan2: + case LibFunc_atan2f: + case LibFunc_atan2l: + if (visitBinaryFloatCall(I, ISD::FATAN2)) + return; + break; + case LibFunc_tan: + case LibFunc_tanf: + case LibFunc_tanl: + if (visitUnaryFloatCall(I, ISD::FTAN)) + return; + break; + case LibFunc_acosh: + case LibFunc_acoshf: + case LibFunc_acoshl: + if (visitUnaryFloatCall(I, ISD::FACOSH)) + return; + break; + case LibFunc_asinh: + case LibFunc_asinhf: + case LibFunc_asinhl: + if (visitUnaryFloatCall(I, ISD::FASINH)) + return; + break; + case LibFunc_atanh: + case LibFunc_atanhf: + case LibFunc_atanhl: + if (visitUnaryFloatCall(I, ISD::FATANH)) + return; + break; + case LibFunc_cosh: + case LibFunc_coshf: + case LibFunc_coshl: + if (visitUnaryFloatCall(I, ISD::FCOSH)) + return; + break; + case LibFunc_sinh: + case LibFunc_sinhf: + case LibFunc_sinhl: + if (visitUnaryFloatCall(I, ISD::FSINH)) + return; + break; + case LibFunc_tanh: + case LibFunc_tanhf: + case LibFunc_tanhl: + if (visitUnaryFloatCall(I, ISD::FTANH)) + return; + break; + case LibFunc_lgamma: + case LibFunc_lgammaf: + case LibFunc_lgammal: + if (visitUnaryFloatCall(I, ISD::FLGAMMA)) + return; + break; + case LibFunc_tgamma: + case LibFunc_tgammaf: + case LibFunc_tgammal: + if (visitUnaryFloatCall(I, ISD::FTGAMMA)) + return; + break; + case LibFunc_exp10: + case LibFunc_exp10f: + case LibFunc_exp10l: + if (visitUnaryFloatCall(I, ISD::FEXP10)) + return; + break; case LibFunc_memcmp: if (visitMemCmpCall(I)) return; Index: lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -218,6 +218,33 @@ case ISD::FLOG10: return "flog10"; case ISD::STRICT_FLOG10: return "strict_flog10"; + case ISD::FACOS: return "facos"; + case ISD::STRICT_FACOS: return "strict_facos"; + case ISD::FASIN: return "fasin"; + case ISD::STRICT_FASIN: return "strict_fasin"; + case ISD::FATAN: return "fatan"; + case ISD::STRICT_FATAN: return "strict_fatan"; + case ISD::FTAN: return "ftan"; + case ISD::STRICT_FTAN: return "strict_ftan"; + case ISD::FCOSH: return "fcosh"; + case ISD::STRICT_FCOSH: return "strict_fcosh"; + case ISD::FSINH: return "fsinh"; + case ISD::STRICT_FSINH: return "strict_fsinh"; + case ISD::FTANH: return "ftanh"; + case ISD::STRICT_FTANH: return "strict_ftanh"; + case ISD::FASINH: return "fasinh"; + case ISD::STRICT_FASINH: return "strict_fasinh"; + case ISD::FACOSH: return "facosh"; + case ISD::STRICT_FACOSH: return "strict_facosh"; + case ISD::FATANH: return "fatanh"; + case ISD::STRICT_FATANH: return "strict_fatanh"; + case ISD::FLGAMMA: return "flgamma"; + case ISD::STRICT_FLGAMMA: return "strict_flgamma"; + case ISD::FTGAMMA: return "ftgamma"; + case ISD::STRICT_FTGAMMA: return "strict_ftgamma"; + case ISD::FEXP10: return "fexp10"; + case ISD::STRICT_FEXP10: return "strict_fexp10"; + // Binary operators case ISD::ADD: return "add"; case ISD::SUB: return "sub"; @@ -267,6 +294,8 @@ case ISD::FPOWI: return "fpowi"; case ISD::STRICT_FPOWI: return "strict_fpowi"; + case ISD::FATAN2: return "fatan2"; + case ISD::STRICT_FATAN2: return "strict_fatan2"; case ISD::SETCC: return "setcc"; case ISD::SETCCCARRY: return "setcccarry"; case ISD::SELECT: return "select"; Index: test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64-fast.ll =================================================================== --- test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64-fast.ll +++ test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64-fast.ll @@ -0,0 +1,1078 @@ +; Do NOT use -O3. It will lower exp2 to ldexp, and the test will fail. +; RUN: opt -vector-library=SLEEF -loop-unroll -loop-vectorize -S < %s | FileCheck %s +; RUN: opt -vector-library=SLEEF -loop-unroll -loop-vectorize -fp-contract=fast -S < %s | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +declare double @acos(double) #0 +declare float @acosf(float) #0 +declare double @llvm.acos.f64(double) #0 +declare float @llvm.acos.f32(float) #0 + +define void @acos_f64(double* nocapture %varray) { + ; CHECK-LABEL: @acos_f64( + ; CHECK: [[TMP5:%.*]] = call fast <2 x double> @_ZGVnN2v_acos(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = call fast double @acos(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @acos_f32(float* nocapture %varray) { + ; CHECK-LABEL: @acos_f32( + ; CHECK: [[TMP5:%.*]] = call fast <4 x float> @_ZGVnN4v_acosf(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = call fast float @acosf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @asin(double) #0 +declare float @asinf(float) #0 +declare double @llvm.asin.f64(double) #0 +declare float @llvm.asin.f32(float) #0 + +define void @asin_f64(double* nocapture %varray) { + ; CHECK-LABEL: @asin_f64( + ; CHECK: [[TMP5:%.*]] = call fast <2 x double> @_ZGVnN2v_asin(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = call fast double @asin(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @asin_f32(float* nocapture %varray) { + ; CHECK-LABEL: @asin_f32( + ; CHECK: [[TMP5:%.*]] = call fast <4 x float> @_ZGVnN4v_asinf(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = call fast float @asinf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @atan(double) #0 +declare float @atanf(float) #0 +declare double @llvm.atan.f64(double) #0 +declare float @llvm.atan.f32(float) #0 + +define void @atan_f64(double* nocapture %varray) { + ; CHECK-LABEL: @atan_f64( + ; CHECK: [[TMP5:%.*]] = call fast <2 x double> @_ZGVnN2v_atan(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = call fast double @atan(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @atan_f32(float* nocapture %varray) { + ; CHECK-LABEL: @atan_f32( + ; CHECK: [[TMP5:%.*]] = call fast <4 x float> @_ZGVnN4v_atanf(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = call fast float @atanf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @atan2(double) #0 +declare float @atan2f(float) #0 +declare double @llvm.atan2.f64(double) #0 +declare float @llvm.atan2.f32(float) #0 + +define void @atan2_f64(double* nocapture %varray) { + ; CHECK-LABEL: @atan2_f64( + ; CHECK: [[TMP5:%.*]] = call fast <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = call fast double @atan2(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @atan2_f32(float* nocapture %varray) { + ; CHECK-LABEL: @atan2_f32( + ; CHECK: [[TMP5:%.*]] = call fast <4 x float> @_ZGVnN4vv_atan2f(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = call fast float @atan2f(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @atanh(double) #0 +declare float @atanhf(float) #0 +declare double @llvm.atanh.f64(double) #0 +declare float @llvm.atanh.f32(float) #0 + +define void @atanh_f64(double* nocapture %varray) { + ; CHECK-LABEL: @atanh_f64( + ; CHECK: [[TMP5:%.*]] = call fast <2 x double> @_ZGVnN2v_atanh(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = call fast double @atanh(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @atanh_f32(float* nocapture %varray) { + ; CHECK-LABEL: @atanh_f32( + ; CHECK: [[TMP5:%.*]] = call fast <4 x float> @_ZGVnN4v_atanhf(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = call fast float @atanhf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @cos(double) #0 +declare float @cosf(float) #0 +declare double @llvm.cos.f64(double) #0 +declare float @llvm.cos.f32(float) #0 + +define void @cos_f64(double* nocapture %varray) { + ; CHECK-LABEL: @cos_f64( + ; CHECK: [[TMP5:%.*]] = call fast <2 x double> @_ZGVnN2v_cos(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = call fast double @cos(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @cos_f32(float* nocapture %varray) { + ; CHECK-LABEL: @cos_f32( + ; CHECK: [[TMP5:%.*]] = call fast <4 x float> @_ZGVnN4v_cosf(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = call fast float @cosf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @cosh(double) #0 +declare float @coshf(float) #0 +declare double @llvm.cosh.f64(double) #0 +declare float @llvm.cosh.f32(float) #0 + +define void @cosh_f64(double* nocapture %varray) { + ; CHECK-LABEL: @cosh_f64( + ; CHECK: [[TMP5:%.*]] = call fast <2 x double> @_ZGVnN2v_cosh(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = call fast double @cosh(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @cosh_f32(float* nocapture %varray) { + ; CHECK-LABEL: @cosh_f32( + ; CHECK: [[TMP5:%.*]] = call fast <4 x float> @_ZGVnN4v_coshf(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = call fast float @coshf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @exp(double) #0 +declare float @expf(float) #0 +declare double @llvm.exp.f64(double) #0 +declare float @llvm.exp.f32(float) #0 + +define void @exp_f64(double* nocapture %varray) { + ; CHECK-LABEL: @exp_f64( + ; CHECK: [[TMP5:%.*]] = call fast <2 x double> @_ZGVnN2v_exp(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = call fast double @exp(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @exp_f32(float* nocapture %varray) { + ; CHECK-LABEL: @exp_f32( + ; CHECK: [[TMP5:%.*]] = call fast <4 x float> @_ZGVnN4v_expf(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = call fast float @expf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @exp2(double) #0 +declare float @exp2f(float) #0 +declare double @llvm.exp2.f64(double) #0 +declare float @llvm.exp2.f32(float) #0 + +define void @exp2_f64(double* nocapture %varray) { + ; CHECK-LABEL: @exp2_f64( + ; CHECK: [[TMP5:%.*]] = call fast <2 x double> @_ZGVnN2v_exp2(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = call fast double @exp2(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @exp2_f32(float* nocapture %varray) { + ; CHECK-LABEL: @exp2_f32( + ; CHECK: [[TMP5:%.*]] = call fast <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = call fast float @exp2f(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @exp10(double) #0 +declare float @exp10f(float) #0 +declare double @llvm.exp10.f64(double) #0 +declare float @llvm.exp10.f32(float) #0 + +define void @exp10_f64(double* nocapture %varray) { + ; CHECK-LABEL: @exp10_f64( + ; CHECK: [[TMP5:%.*]] = call fast <2 x double> @_ZGVnN2v_exp10(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = call fast double @exp10(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @exp10_f32(float* nocapture %varray) { + ; CHECK-LABEL: @exp10_f32( + ; CHECK: [[TMP5:%.*]] = call fast <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = call fast float @exp10f(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @lgamma(double) #0 +declare float @lgammaf(float) #0 +declare double @llvm.lgamma.f64(double) #0 +declare float @llvm.lgamma.f32(float) #0 + +define void @lgamma_f64(double* nocapture %varray) { + ; CHECK-LABEL: @lgamma_f64( + ; CHECK: [[TMP5:%.*]] = call fast <2 x double> @_ZGVnN2v_lgamma(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = call fast double @lgamma(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @lgamma_f32(float* nocapture %varray) { + ; CHECK-LABEL: @lgamma_f32( + ; CHECK: [[TMP5:%.*]] = call fast <4 x float> @_ZGVnN4v_lgammaf(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = call fast float @lgammaf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @log10(double) #0 +declare float @log10f(float) #0 +declare double @llvm.log10.f64(double) #0 +declare float @llvm.log10.f32(float) #0 + +define void @log10_f64(double* nocapture %varray) { + ; CHECK-LABEL: @log10_f64( + ; CHECK: [[TMP5:%.*]] = call fast <2 x double> @_ZGVnN2v_log10(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = call fast double @log10(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @log10_f32(float* nocapture %varray) { + ; CHECK-LABEL: @log10_f32( + ; CHECK: [[TMP5:%.*]] = call fast <4 x float> @_ZGVnN4v_log10f(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = call fast float @log10f(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @log2(double) #0 +declare float @log2f(float) #0 +declare double @llvm.log2.f64(double) #0 +declare float @llvm.log2.f32(float) #0 + +define void @log2_f64(double* nocapture %varray) { + ; CHECK-LABEL: @log2_f64( + ; CHECK: [[TMP5:%.*]] = call fast <2 x double> @_ZGVnN2v_log2(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = call fast double @log2(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @log2_f32(float* nocapture %varray) { + ; CHECK-LABEL: @log2_f32( + ; CHECK: [[TMP5:%.*]] = call fast <4 x float> @_ZGVnN4v_log2f(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = call fast float @log2f(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @log(double) #0 +declare float @logf(float) #0 +declare double @llvm.log.f64(double) #0 +declare float @llvm.log.f32(float) #0 + +define void @log_f64(double* nocapture %varray) { + ; CHECK-LABEL: @log_f64( + ; CHECK: [[TMP5:%.*]] = call fast <2 x double> @_ZGVnN2v_log(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = call fast double @log(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @log_f32(float* nocapture %varray) { + ; CHECK-LABEL: @log_f32( + ; CHECK: [[TMP5:%.*]] = call fast <4 x float> @_ZGVnN4v_logf(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = call fast float @logf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @pow(double) #0 +declare float @powf(float) #0 +declare double @llvm.pow.f64(double) #0 +declare float @llvm.pow.f32(float) #0 + +define void @pow_f64(double* nocapture %varray) { + ; CHECK-LABEL: @pow_f64( + ; CHECK: [[TMP5:%.*]] = call fast <2 x double> @_ZGVnN2vv_pow(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = call fast double @pow(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @pow_f32(float* nocapture %varray) { + ; CHECK-LABEL: @pow_f32( + ; CHECK: [[TMP5:%.*]] = call fast <4 x float> @_ZGVnN4vv_powf(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = call fast float @powf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @sin(double) #0 +declare float @sinf(float) #0 +declare double @llvm.sin.f64(double) #0 +declare float @llvm.sin.f32(float) #0 + +define void @sin_f64(double* nocapture %varray) { + ; CHECK-LABEL: @sin_f64( + ; CHECK: [[TMP5:%.*]] = call fast <2 x double> @_ZGVnN2v_sin(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = call fast double @sin(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @sin_f32(float* nocapture %varray) { + ; CHECK-LABEL: @sin_f32( + ; CHECK: [[TMP5:%.*]] = call fast <4 x float> @_ZGVnN4v_sinf(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = call fast float @sinf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @sinh(double) #0 +declare float @sinhf(float) #0 +declare double @llvm.sinh.f64(double) #0 +declare float @llvm.sinh.f32(float) #0 + +define void @sinh_f64(double* nocapture %varray) { + ; CHECK-LABEL: @sinh_f64( + ; CHECK: [[TMP5:%.*]] = call fast <2 x double> @_ZGVnN2v_sinh(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = call fast double @sinh(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @sinh_f32(float* nocapture %varray) { + ; CHECK-LABEL: @sinh_f32( + ; CHECK: [[TMP5:%.*]] = call fast <4 x float> @_ZGVnN4v_sinhf(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = call fast float @sinhf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @sqrt(double) #0 +declare float @sqrtf(float) #0 +declare double @llvm.sqrt.f64(double) #0 +declare float @llvm.sqrt.f32(float) #0 + +define void @sqrt_f64(double* nocapture %varray) { + ; CHECK-LABEL: @sqrt_f64( + ; CHECK: [[TMP5:%.*]] = call fast <2 x double> @_ZGVnN2v_sqrt(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = call fast double @sqrt(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @sqrt_f32(float* nocapture %varray) { + ; CHECK-LABEL: @sqrt_f32( + ; CHECK: [[TMP5:%.*]] = call fast <4 x float> @_ZGVnN4v_sqrtf(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = call fast float @sqrtf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @tan(double) #0 +declare float @tanf(float) #0 +declare double @llvm.tan.f64(double) #0 +declare float @llvm.tan.f32(float) #0 + +define void @tan_f64(double* nocapture %varray) { + ; CHECK-LABEL: @tan_f64( + ; CHECK: [[TMP5:%.*]] = call fast <2 x double> @_ZGVnN2v_tan(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = call fast double @tan(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @tan_f32(float* nocapture %varray) { + ; CHECK-LABEL: @tan_f32( + ; CHECK: [[TMP5:%.*]] = call fast <4 x float> @_ZGVnN4v_tanf(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = call fast float @tanf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @tanh(double) #0 +declare float @tanhf(float) #0 +declare double @llvm.tanh.f64(double) #0 +declare float @llvm.tanh.f32(float) #0 + +define void @tanh_f64(double* nocapture %varray) { + ; CHECK-LABEL: @tanh_f64( + ; CHECK: [[TMP5:%.*]] = call fast <2 x double> @_ZGVnN2v_tanh(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = call fast double @tanh(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @tanh_f32(float* nocapture %varray) { + ; CHECK-LABEL: @tanh_f32( + ; CHECK: [[TMP5:%.*]] = call fast <4 x float> @_ZGVnN4v_tanhf(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = call fast float @tanhf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @tgamma(double) #0 +declare float @tgammaf(float) #0 +declare double @llvm.tgamma.f64(double) #0 +declare float @llvm.tgamma.f32(float) #0 + +define void @tgamma_f64(double* nocapture %varray) { + ; CHECK-LABEL: @tgamma_f64( + ; CHECK: [[TMP5:%.*]] = call fast <2 x double> @_ZGVnN2v_tgamma(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = call fast double @tgamma(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @tgamma_f32(float* nocapture %varray) { + ; CHECK-LABEL: @tgamma_f32( + ; CHECK: [[TMP5:%.*]] = call fast <4 x float> @_ZGVnN4v_tgammaf(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = call fast float @tgammaf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + Index: test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll =================================================================== --- test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll +++ test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll @@ -0,0 +1,1078 @@ +; Do NOT use -O3. It will lower exp2 to ldexp, and the test will fail. +; RUN: opt -vector-library=SLEEF -loop-unroll -loop-vectorize -S < %s | FileCheck %s +; RUN: opt -vector-library=SLEEF -loop-unroll -loop-vectorize -fp-contract=fast -S < %s | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +declare double @acos(double) #0 +declare float @acosf(float) #0 +declare double @llvm.acos.f64(double) #0 +declare float @llvm.acos.f32(float) #0 + +define void @acos_f64(double* nocapture %varray) { + ; CHECK-LABEL: @acos_f64( + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_acos(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @acos(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @acos_f32(float* nocapture %varray) { + ; CHECK-LABEL: @acos_f32( + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_acosf(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @acosf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @asin(double) #0 +declare float @asinf(float) #0 +declare double @llvm.asin.f64(double) #0 +declare float @llvm.asin.f32(float) #0 + +define void @asin_f64(double* nocapture %varray) { + ; CHECK-LABEL: @asin_f64( + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_asin(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @asin(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @asin_f32(float* nocapture %varray) { + ; CHECK-LABEL: @asin_f32( + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_asinf(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @asinf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @atan(double) #0 +declare float @atanf(float) #0 +declare double @llvm.atan.f64(double) #0 +declare float @llvm.atan.f32(float) #0 + +define void @atan_f64(double* nocapture %varray) { + ; CHECK-LABEL: @atan_f64( + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_atan(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @atan(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @atan_f32(float* nocapture %varray) { + ; CHECK-LABEL: @atan_f32( + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_atanf(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @atanf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @atan2(double) #0 +declare float @atan2f(float) #0 +declare double @llvm.atan2.f64(double) #0 +declare float @llvm.atan2.f32(float) #0 + +define void @atan2_f64(double* nocapture %varray) { + ; CHECK-LABEL: @atan2_f64( + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @atan2(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @atan2_f32(float* nocapture %varray) { + ; CHECK-LABEL: @atan2_f32( + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4vv_atan2f(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @atan2f(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @atanh(double) #0 +declare float @atanhf(float) #0 +declare double @llvm.atanh.f64(double) #0 +declare float @llvm.atanh.f32(float) #0 + +define void @atanh_f64(double* nocapture %varray) { + ; CHECK-LABEL: @atanh_f64( + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_atanh(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @atanh(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @atanh_f32(float* nocapture %varray) { + ; CHECK-LABEL: @atanh_f32( + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_atanhf(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @atanhf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @cos(double) #0 +declare float @cosf(float) #0 +declare double @llvm.cos.f64(double) #0 +declare float @llvm.cos.f32(float) #0 + +define void @cos_f64(double* nocapture %varray) { + ; CHECK-LABEL: @cos_f64( + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_cos(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @cos(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @cos_f32(float* nocapture %varray) { + ; CHECK-LABEL: @cos_f32( + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_cosf(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @cosf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @cosh(double) #0 +declare float @coshf(float) #0 +declare double @llvm.cosh.f64(double) #0 +declare float @llvm.cosh.f32(float) #0 + +define void @cosh_f64(double* nocapture %varray) { + ; CHECK-LABEL: @cosh_f64( + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_cosh(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @cosh(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @cosh_f32(float* nocapture %varray) { + ; CHECK-LABEL: @cosh_f32( + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_coshf(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @coshf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @exp(double) #0 +declare float @expf(float) #0 +declare double @llvm.exp.f64(double) #0 +declare float @llvm.exp.f32(float) #0 + +define void @exp_f64(double* nocapture %varray) { + ; CHECK-LABEL: @exp_f64( + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_exp(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @exp(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @exp_f32(float* nocapture %varray) { + ; CHECK-LABEL: @exp_f32( + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_expf(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @expf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @exp2(double) #0 +declare float @exp2f(float) #0 +declare double @llvm.exp2.f64(double) #0 +declare float @llvm.exp2.f32(float) #0 + +define void @exp2_f64(double* nocapture %varray) { + ; CHECK-LABEL: @exp2_f64( + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_exp2(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @exp2(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @exp2_f32(float* nocapture %varray) { + ; CHECK-LABEL: @exp2_f32( + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @exp2f(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @exp10(double) #0 +declare float @exp10f(float) #0 +declare double @llvm.exp10.f64(double) #0 +declare float @llvm.exp10.f32(float) #0 + +define void @exp10_f64(double* nocapture %varray) { + ; CHECK-LABEL: @exp10_f64( + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_exp10(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @exp10(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @exp10_f32(float* nocapture %varray) { + ; CHECK-LABEL: @exp10_f32( + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @exp10f(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @lgamma(double) #0 +declare float @lgammaf(float) #0 +declare double @llvm.lgamma.f64(double) #0 +declare float @llvm.lgamma.f32(float) #0 + +define void @lgamma_f64(double* nocapture %varray) { + ; CHECK-LABEL: @lgamma_f64( + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_lgamma(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @lgamma(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @lgamma_f32(float* nocapture %varray) { + ; CHECK-LABEL: @lgamma_f32( + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_lgammaf(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @lgammaf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @log10(double) #0 +declare float @log10f(float) #0 +declare double @llvm.log10.f64(double) #0 +declare float @llvm.log10.f32(float) #0 + +define void @log10_f64(double* nocapture %varray) { + ; CHECK-LABEL: @log10_f64( + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_log10(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @log10(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @log10_f32(float* nocapture %varray) { + ; CHECK-LABEL: @log10_f32( + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_log10f(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @log10f(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @log2(double) #0 +declare float @log2f(float) #0 +declare double @llvm.log2.f64(double) #0 +declare float @llvm.log2.f32(float) #0 + +define void @log2_f64(double* nocapture %varray) { + ; CHECK-LABEL: @log2_f64( + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_log2(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @log2(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @log2_f32(float* nocapture %varray) { + ; CHECK-LABEL: @log2_f32( + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_log2f(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @log2f(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @log(double) #0 +declare float @logf(float) #0 +declare double @llvm.log.f64(double) #0 +declare float @llvm.log.f32(float) #0 + +define void @log_f64(double* nocapture %varray) { + ; CHECK-LABEL: @log_f64( + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_log(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @log(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @log_f32(float* nocapture %varray) { + ; CHECK-LABEL: @log_f32( + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_logf(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @logf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @pow(double) #0 +declare float @powf(float) #0 +declare double @llvm.pow.f64(double) #0 +declare float @llvm.pow.f32(float) #0 + +define void @pow_f64(double* nocapture %varray) { + ; CHECK-LABEL: @pow_f64( + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2vv_pow(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @pow(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @pow_f32(float* nocapture %varray) { + ; CHECK-LABEL: @pow_f32( + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4vv_powf(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @powf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @sin(double) #0 +declare float @sinf(float) #0 +declare double @llvm.sin.f64(double) #0 +declare float @llvm.sin.f32(float) #0 + +define void @sin_f64(double* nocapture %varray) { + ; CHECK-LABEL: @sin_f64( + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_sin(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @sin(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @sin_f32(float* nocapture %varray) { + ; CHECK-LABEL: @sin_f32( + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_sinf(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @sinf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @sinh(double) #0 +declare float @sinhf(float) #0 +declare double @llvm.sinh.f64(double) #0 +declare float @llvm.sinh.f32(float) #0 + +define void @sinh_f64(double* nocapture %varray) { + ; CHECK-LABEL: @sinh_f64( + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_sinh(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @sinh(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @sinh_f32(float* nocapture %varray) { + ; CHECK-LABEL: @sinh_f32( + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_sinhf(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @sinhf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @sqrt(double) #0 +declare float @sqrtf(float) #0 +declare double @llvm.sqrt.f64(double) #0 +declare float @llvm.sqrt.f32(float) #0 + +define void @sqrt_f64(double* nocapture %varray) { + ; CHECK-LABEL: @sqrt_f64( + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_sqrt(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @sqrt(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @sqrt_f32(float* nocapture %varray) { + ; CHECK-LABEL: @sqrt_f32( + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_sqrtf(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @sqrtf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @tan(double) #0 +declare float @tanf(float) #0 +declare double @llvm.tan.f64(double) #0 +declare float @llvm.tan.f32(float) #0 + +define void @tan_f64(double* nocapture %varray) { + ; CHECK-LABEL: @tan_f64( + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_tan(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @tan(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @tan_f32(float* nocapture %varray) { + ; CHECK-LABEL: @tan_f32( + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_tanf(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @tanf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @tanh(double) #0 +declare float @tanhf(float) #0 +declare double @llvm.tanh.f64(double) #0 +declare float @llvm.tanh.f32(float) #0 + +define void @tanh_f64(double* nocapture %varray) { + ; CHECK-LABEL: @tanh_f64( + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_tanh(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @tanh(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @tanh_f32(float* nocapture %varray) { + ; CHECK-LABEL: @tanh_f32( + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_tanhf(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @tanhf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +declare double @tgamma(double) #0 +declare float @tgammaf(float) #0 +declare double @llvm.tgamma.f64(double) #0 +declare float @llvm.tgamma.f32(float) #0 + +define void @tgamma_f64(double* nocapture %varray) { + ; CHECK-LABEL: @tgamma_f64( + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_tgamma(<2 x double> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @tgamma(double %conv) + %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv + store double %call, double* %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + +define void @tgamma_f32(float* nocapture %varray) { + ; CHECK-LABEL: @tgamma_f32( + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_tgammaf(<4 x float> [[TMP4:%.*]]) + ; CHECK: ret void + ; + entry: + br label %for.body + + for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @tgammaf(float %conv) + %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv + store float %call, float* %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + + for.end: + ret void +} + Index: unittests/Analysis/TargetLibraryInfoTest.cpp =================================================================== --- unittests/Analysis/TargetLibraryInfoTest.cpp +++ unittests/Analysis/TargetLibraryInfoTest.cpp @@ -216,6 +216,9 @@ "declare float @ldexpf(float, i32)\n" "declare x86_fp80 @ldexpl(x86_fp80, i32)\n" "declare i64 @llabs(i64)\n" + "declare double @lgamma(double)\n" + "declare float @lgammaf(float)\n" + "declare x86_fp80 @lgammal(x86_fp80)\n" "declare double @log(double)\n" "declare double @log10(double)\n" "declare float @log10f(float)\n" @@ -324,6 +327,9 @@ "declare float @tanhf(float)\n" "declare x86_fp80 @tanhl(x86_fp80)\n" "declare x86_fp80 @tanl(x86_fp80)\n" + "declare float @tgammaf(float)\n" + "declare double @tgamma(double)\n" + "declare x86_fp80 @tgammal(x86_fp80)\n" "declare i64 @times(%struct*)\n" "declare %struct* @tmpfile()\n" "declare i32 @_Z7toasciii(i32)\n" @@ -533,6 +539,12 @@ "declare double @__exp_finite(double)\n" "declare float @__expf_finite(float)\n" "declare x86_fp80 @__expl_finite(x86_fp80)\n" + "declare double @__gamma_r_finite(double, i32*)\n" + "declare float @__gammaf_r_finite(float, i32*)\n" + "declare x86_fp80 @__gammal_r_finite(x86_fp80, i32*)\n" + "declare double @__lgamma_r_finite(double, i32*)\n" + "declare float @__lgammaf_r_finite(float, i32*)\n" + "declare x86_fp80 @__lgammal_r_finite(x86_fp80, i32*)\n" "declare double @__log10_finite(double)\n" "declare float @__log10f_finite(float)\n" "declare x86_fp80 @__log10l_finite(x86_fp80)\n"