diff --git a/libc/src/math/generic/coshf.cpp b/libc/src/math/generic/coshf.cpp --- a/libc/src/math/generic/coshf.cpp +++ b/libc/src/math/generic/coshf.cpp @@ -23,13 +23,13 @@ uint32_t x_u = xbits.uintval(); - // |x| <= 2^-26 - if (LIBC_UNLIKELY(x_u <= 0x3280'0000U)) { - return 1.0f + x; - } - // When |x| >= 90, or x is inf or nan - if (LIBC_UNLIKELY(x_u >= 0x42b4'0000U)) { + if (LIBC_UNLIKELY(x_u >= 0x42b4'0000U || x_u <= 0x3280'0000U)) { + // |x| <= 2^-26 + if (x_u <= 0x3280'0000U) { + return 1.0f + x; + } + if (xbits.is_inf_or_nan()) return x + FPBits::inf().get_val(); diff --git a/libc/src/math/generic/explogxf.h b/libc/src/math/generic/explogxf.h --- a/libc/src/math/generic/explogxf.h +++ b/libc/src/math/generic/explogxf.h @@ -16,6 +16,7 @@ #include "src/__support/FPUtil/PolyEval.h" #include "src/__support/FPUtil/nearest_integer.h" #include "src/__support/common.h" +#include "src/__support/macros/properties/cpu_features.h" #include @@ -210,13 +211,24 @@ template LIBC_INLINE double exp_pm_eval(float x) { double xd = static_cast(x); - // round(x * log2(e) * 2^5) - double kd = fputil::nearest_integer(ExpBase::LOG2_B * xd); - + // kd = round(x * log2(e) * 2^5) // k_p = round(x * log2(e) * 2^5) - int k_p = static_cast(kd); // k_m = round(-x * log2(e) * 2^5) - int k_m = -k_p; + double kd; + int k_p, k_m; + +#ifdef LIBC_TARGET_CPU_HAS_NEAREST_INT + kd = fputil::nearest_integer(ExpBase::LOG2_B * xd); + k_p = static_cast(kd); + k_m = -k_p; +#else + constexpr double HALF_WAY[2] = {0.5, -0.5}; + + k_p = static_cast( + fputil::multiply_add(xd, ExpBase::LOG2_B, HALF_WAY[x < 0.0f])); + k_m = -k_p; + kd = static_cast(k_p); +#endif // LIBC_TARGET_CPU_HAS_NEAREST_INT // hi = floor(kf * 2^(-5)) // exp_hi = shift hi to the exponent field of double precision. @@ -243,19 +255,19 @@ double dx2 = dx * dx; // c0 = 1 + COEFFS[0] * lo^2 - // P_even = 1 + COEFFS[0] * lo^2 + COEFFS[2] * lo^4 - double p_even = - fputil::polyeval(dx2, 1.0, ExpBase::COEFFS[0], ExpBase::COEFFS[2]); - // P_odd = 1 + COEFFS[1] * lo^2 + COEFFS[3] * lo^4 - double p_odd = - fputil::polyeval(dx2, 1.0, ExpBase::COEFFS[1], ExpBase::COEFFS[3]); + // P_even = (1 + COEFFS[0] * lo^2 + COEFFS[2] * lo^4) / 2 + double p_even = fputil::polyeval(dx2, 0.5, ExpBase::COEFFS[0] * 0.5, + ExpBase::COEFFS[2] * 0.5); + // P_odd = (1 + COEFFS[1] * lo^2 + COEFFS[3] * lo^4) / 2 + double p_odd = fputil::polyeval(dx2, 0.5, ExpBase::COEFFS[1] * 0.5, + ExpBase::COEFFS[3] * 0.5); double r; if constexpr (is_sinh) r = fputil::multiply_add(dx * mh_sum, p_odd, p_even * mh_diff); else r = fputil::multiply_add(dx * mh_diff, p_odd, p_even * mh_sum); - return 0.5 * r; + return r; } // x should be positive, normal finite value diff --git a/libc/src/math/generic/sinhf.cpp b/libc/src/math/generic/sinhf.cpp --- a/libc/src/math/generic/sinhf.cpp +++ b/libc/src/math/generic/sinhf.cpp @@ -17,23 +17,43 @@ LLVM_LIBC_FUNCTION(float, sinhf, (float x)) { using FPBits = typename fputil::FPBits; FPBits xbits(x); - bool sign = xbits.get_sign(); uint32_t x_abs = xbits.uintval() & FPBits::FloatProp::EXP_MANT_MASK; - // |x| <= 2^-26 - if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) { - return static_cast( - LIBC_UNLIKELY(x_abs == 0) ? x : (x + 0.25 * x * x * x)); - } - // When |x| >= 90, or x is inf or nan - if (LIBC_UNLIKELY(x_abs >= 0x42b4'0000U)) { + if (LIBC_UNLIKELY(x_abs >= 0x42b4'0000U || x_abs <= 0x3da0'0000U)) { + // |x| <= 0.078125 + if (x_abs <= 0x3da0'0000U) { + // |x| = 0.0005589424981735646724700927734375 + if (LIBC_UNLIKELY(x_abs == 0x3a12'85ffU)) { + if (fputil::fenv_is_round_to_nearest()) + return x; + } + + // |x| <= 2^-26 + if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) { + return static_cast( + LIBC_UNLIKELY(x_abs == 0) ? x : (x + 0.25 * x * x * x)); + } + + double xdbl = x; + double x2 = xdbl * xdbl; + // Sollya: fpminimax(sinh(x),[|3,5,7|],[|D...|],[-1/16-1/64;1/16+1/64],x); + // Sollya output: x * (0x1p0 + x^0x1p1 * (0x1.5555555556583p-3 + x^0x1p1 + // * (0x1.111110d239f1fp-7 + // + x^0x1p1 * 0x1.a02b5a284013cp-13))) + // Therefore, output of Sollya = x * pe; + double pe = fputil::polyeval(x2, 0.0, 0x1.5555555556583p-3, + 0x1.111110d239f1fp-7, 0x1.a02b5a284013cp-13); + return static_cast(fputil::multiply_add(xdbl, pe, xdbl)); + } + if (xbits.is_nan()) return x + 1.0f; // sNaN to qNaN + signal if (xbits.is_inf()) return x; + bool sign = xbits.get_sign(); int rounding = fputil::quick_get_round(); if (sign) { if (LIBC_UNLIKELY(rounding == FE_UPWARD || rounding == FE_TOWARDZERO)) @@ -50,26 +70,6 @@ return x + FPBits::inf(sign).get_val(); } - // |x| <= 0.078125 - if (LIBC_UNLIKELY(x_abs <= 0x3da0'0000U)) { - // |x| = 0.0005589424981735646724700927734375 - if (LIBC_UNLIKELY(x_abs == 0x3a12'85ffU)) { - if (fputil::fenv_is_round_to_nearest()) - return x; - } - - double xdbl = x; - double x2 = xdbl * xdbl; - // Sollya: fpminimax(sinh(x),[|3,5,7|],[|D...|],[-1/16-1/64;1/16+1/64],x); - // Sollya output: x * (0x1p0 + x^0x1p1 * (0x1.5555555556583p-3 + x^0x1p1 - // * (0x1.111110d239f1fp-7 - // + x^0x1p1 * 0x1.a02b5a284013cp-13))) - // Therefore, output of Sollya = x * pe; - double pe = fputil::polyeval(x2, 0.0, 0x1.5555555556583p-3, - 0x1.111110d239f1fp-7, 0x1.a02b5a284013cp-13); - return static_cast(fputil::multiply_add(xdbl, pe, xdbl)); - } - // sinh(x) = (e^x - e^(-x)) / 2. return static_cast(exp_pm_eval(x)); }