diff --git a/libc/src/math/generic/explogxf.h b/libc/src/math/generic/explogxf.h --- a/libc/src/math/generic/explogxf.h +++ b/libc/src/math/generic/explogxf.h @@ -21,14 +21,55 @@ namespace __llvm_libc { -static constexpr int EXP_bits_p = 5; -static constexpr int EXP_num_p = 1 << EXP_bits_p; -constexpr double mlp = EXP_num_p; -constexpr double mmld = -1.0 / mlp; - -// Wolfram alpha: N[Table[2^x-1,{x,-16/32,15/32,1/32}],27] -// printf("%.13a,\n", d[i]); -extern const double EXP_2_POW[EXP_num_p]; +struct ExpBase { + // Base = e + static constexpr int MID_BITS = 3; + static constexpr int MID_MASK = (1 << MID_BITS) - 1; + // log2(e) * 2^3 + static constexpr double LOG2_B = 0x1.71547652b82fep+0 * (1 << MID_BITS); + // High and low parts of -log(2) * 2^(-3) + static constexpr double M_LOGB_2_HI = -0x1.62e42fefa0000p-1 / (1 << MID_BITS); + static constexpr double M_LOGB_2_LO = + -0x1.cf79abc9e3b3ap-40 / (1 << MID_BITS); + // Look up table for bit fields of 2^(i/8) for i = 0..7, generated by Sollya + // with: + // > for i from 0 to 7 do printdouble(round(2^(i/8), D, RN)); + static constexpr int64_t EXP_2_MID[1 << MID_BITS] = { + 0x3ff0000000000000, 0x3ff172b83c7d517b, 0x3ff306fe0a31b715, + 0x3ff4bfdad5362a27, 0x3ff6a09e667f3bcd, 0x3ff8ace5422aa0db, + 0x3ffae89f995ad3ad, 0x3ffd5818dcfba487, + }; + + // Approximating e^dx with degree-7 minimax polynomial generated by Sollya: + // > Q = fpminimax(expm1(x)/x, 6, [|1, D...|], [-log(2)/16, log(2)/16]); + // Then: + // e^dx ~ P(dx) = 1 + dx + COEFFS[0] * dx^2 + ... + COEFFS[4] * dx^6. + static constexpr double COEFFS[6] = { + 0x1.00000000000a1p-1, 0x1.5555555555434p-3, 0x1.5555554ad7591p-5, + 0x1.11111115caf71p-7, 0x1.6c1c38380bd59p-10, 0x1.a01b5f9d8b1d3p-13}; + + static constexpr double powb_lo(double dx) { + using fputil::multiply_add; + double dx2 = dx * dx; + double c0 = 1.0 + dx; + // c1 = COEFFS[0] + COEFFS[1] * dx + double c1 = multiply_add(dx, ExpBase::COEFFS[1], ExpBase::COEFFS[0]); + // c2 = COEFFS[2] + COEFFS[3] * dx + double c2 = multiply_add(dx, ExpBase::COEFFS[3], ExpBase::COEFFS[2]); + // c3 = COEFFS[4] + COEFFS[5] * dx + double c3 = multiply_add(dx, ExpBase::COEFFS[5], ExpBase::COEFFS[4]); + double dx4 = dx2 * dx2; + // c4 = c0 + c1 * dx^2 + // = 1 + dx + COEFFS[0] * dx^2 + COEFFS[1] * dx^3 + double c4 = multiply_add(dx2, c1, c0); + // c5 = c2 + c3 * dx^2 + // = COEFFS[2] + COEFFS[3] * dx + COEFFS[4] * dx^2 + COEFFS[5] * dx^3 + double c5 = multiply_add(dx2, c3, c2); + // r = c4 + c5 * dx^4 + // = 1 + dx + COEFFS[0] * dx^2 + ... + COEFFS[5] * dx^7 + return multiply_add(dx4, c5, c4); + } +}; // Look up table for bit fields of 2^(i/16) for i = 0..15, generated by Sollya // with: @@ -55,65 +96,61 @@ extern const double K_LOG2_ODD[4]; extern const double K_LOG2_EVEN[4]; -// The algorithm represents exp(x) as -// exp(x) = 2^(ln(2) * i) * 2^(ln(2) * j / NUM_P )) * exp(dx) -// where i integer value, j integer in range [-NUM_P/2, NUM_P/2). -// 2^(ln(2) * j / NUM_P )) is a table values: 1.0 + EXP_M -// exp(dx) calculates by taylor expansion. - -// Inversion of ln(2). Multiplication by EXP_num_p due to sampling by 1 / -// EXP_num_p Precise value of the constant is not needed. -static constexpr double LN2_INV = 0x1.71547652b82fep+0 * EXP_num_p; - // log2(e) * 2^4 static constexpr double LOG2_E_4 = 0x1.71547652b82fep+4; -// LN2_HIGH + LN2_LOW = ln(2) with precision higher than double(ln(2)) -// Minus sign is to use FMA directly. -static constexpr double LN2_HIGH = -0x1.62e42fefa0000p-1 / EXP_num_p; -static constexpr double LN2_LOW = -0x1.cf79abc9e3b3ap-40 / EXP_num_p; - // -log(2) * 2^(-4) static constexpr double M_LN2_4_HI = -0x1.62e42fefa0000p-5; static constexpr double M_LN2_4_LO = -0x1.cf79abc9e3b3ap-44; -struct exe_eval_result_t { - // exp(x) = 2^MULT_POWER2 * mult_exp * (r + 1.0) - // where - // MULT_POWER2 template parameter; +struct exp_b_result_t { + // b^x = mult_exp * r + // where: // mult_exp = 2^e; - // r in range [~-0.3, ~0.41] + // r double mult_exp; double r; }; -// The function correctly calculates exp value with at least float precision -// in range not narrow than [-log(2^-150), 90] -template -inline static exe_eval_result_t exp_eval(double x) { - double ps_dbl = fputil::nearest_integer(LN2_INV * x); - // Negative sign due to multiply_add optimization - double mult_e1, ml; - { - int ps = - static_cast(ps_dbl) + (1 << (EXP_bits_p - 1)) + - ((fputil::FPBits::EXPONENT_BIAS + MULT_POWER2) << EXP_bits_p); - int table_index = ps & (EXP_num_p - 1); - fputil::FPBits bs; - bs.set_unbiased_exponent(ps >> EXP_bits_p); - ml = EXP_2_POW[table_index]; - mult_e1 = bs.get_val(); - } - double dx = fputil::multiply_add(ps_dbl, LN2_LOW, - fputil::multiply_add(ps_dbl, LN2_HIGH, x)); - - // Taylor series coefficients - double pe = dx * fputil::polyeval(dx, 1.0, 0x1.0p-1, 0x1.5555555555555p-3, - 0x1.5555555555555p-5, 0x1.1111111111111p-7, - 0x1.6c16c16c16c17p-10); - - double r = fputil::multiply_add(ml, pe, pe) + ml; - return {mult_e1, r}; +// The function correctly calculates b^x value with at least float precision +// in a limited range. +// Range reduction: +// b^x = 2^(hi + mid) * b^lo +// where: +// x = (hi + mid) * log_b(2) + lo +// hi is an integer, +// 0 <= mid * 2^MID_BITS < 2^MID_BITS is an integer +// -2^(-MID_BITS - 1) <= lo * log2(b) <= 2^(-MID_BITS - 1) +// Base class needs to provide the following constants and function: +// - MID_BITS : number of bits after decimal points used for mid +// - MID_MASK : 2^MID_BITS - 1, mask to extract mid bits +// - LOG2_B : log2(b) * 2^MID_BITS for scaling +// - M_LOGB_2_HI : high part of -log_b(2) * 2^(-MID_BITS) +// - M_LOGB_2_LO : low part of -log_b(2) * 2^(-MID_BITS) +// - EXP_2_MID : look up table for bit fields of 2^mid +// - powb_lo : approximating b^lo. +// Return: +// { 2^(hi + mid), b^lo } +template static inline exp_b_result_t exp_b_eval(float x) { + double xd = static_cast(x); + // kd = round((hi + mid) * log2(b) * 2^MID_BITS) + double kd = fputil::nearest_integer(Base::LOG2_B * xd); + // k = round((hi + mid) * log2(b) * 2^MID_BITS) + int k = static_cast(kd); + // hi = floor(kd * 2^(-MID_BITS)) + // exp_hi = shift hi to the exponent field of double precision. + int64_t exp_hi = static_cast((k >> Base::MID_BITS)) + << fputil::FloatProperties::MANTISSA_WIDTH; + // mh = 2^hi * 2^mid + // mh_bits = bit field of mh + int64_t mh_bits = Base::EXP_2_MID[k & Base::MID_MASK] + exp_hi; + double mh = fputil::FPBits(uint64_t(mh_bits)).get_val(); + // dx = lo = x - (hi + mid) * log(2) + double dx = fputil::multiply_add( + kd, Base::M_LOGB_2_LO, fputil::multiply_add(kd, Base::M_LOGB_2_HI, xd)); + // r = b^lo + double r = Base::powb_lo(dx); + return {mh, r}; } // The function correctly calculates sinh(x) and cosh(x) by calculating exp(x) diff --git a/libc/src/math/generic/explogxf.cpp b/libc/src/math/generic/explogxf.cpp --- a/libc/src/math/generic/explogxf.cpp +++ b/libc/src/math/generic/explogxf.cpp @@ -10,21 +10,6 @@ namespace __llvm_libc { -// Wolfram alpha: N[Table[2^x-1,{x,-16/32,15/32,1/32}],27] -// printf("%.13a,\n", d[i]); -alignas(64) const double EXP_2_POW[EXP_num_p] = { - -0x1.2bec333018867p-2, -0x1.1c1142e274118p-2, -0x1.0bdd71829fcf2p-2, - -0x1.f69d99accc7b6p-3, -0x1.d4c6af7557c93p-3, -0x1.b23213cc8e86cp-3, - -0x1.8edb9f5703dc0p-3, -0x1.6abf137076a8ep-3, -0x1.45d819a94b14bp-3, - -0x1.20224341286e4p-3, -0x1.f332113d56b1fp-4, -0x1.a46f918837cb7p-4, - -0x1.53f391822dbc7p-4, -0x1.01b466423250ap-4, -0x1.5b505d5b6f268p-5, - -0x1.5f134923757f3p-6, 0x0.0000000000000p+0, 0x1.66c34c5615d0fp-6, - 0x1.6ab0d9f3121ecp-5, 0x1.1301d0125b50ap-4, 0x1.72b83c7d517aep-4, - 0x1.d4873168b9aa8p-4, 0x1.1c3d373ab11c3p-3, 0x1.4f4efa8fef709p-3, - 0x1.837f0518db8a9p-3, 0x1.b8d39b9d54e55p-3, 0x1.ef5326091a112p-3, - 0x1.13821818624b4p-2, 0x1.2ff6b54d8a89cp-2, 0x1.4d0ad5a753e07p-2, - 0x1.6ac1f752150a5p-2, 0x1.891fac0e95613p-2}; - // N[Table[Log[2, 1 + x], {x, 0/64, 63/64, 1/64}], 40] alignas(64) const double LOG_P1_LOG2[LOG_P1_SIZE] = { 0x0.0000000000000p+0, 0x1.6e79685c2d22ap-6, 0x1.6bad3758efd87p-5, diff --git a/libc/src/math/generic/tanhf.cpp b/libc/src/math/generic/tanhf.cpp --- a/libc/src/math/generic/tanhf.cpp +++ b/libc/src/math/generic/tanhf.cpp @@ -53,10 +53,15 @@ return FPBits(0x3f7f'6ad9U).get_val(); } - auto ep = exp_eval(2.0f * (sign ? x : -x)); // exp(-2 * x) - double result = fputil::multiply_add(ep.mult_exp, ep.r, ep.mult_exp - 1.0) / - (fputil::multiply_add(ep.mult_exp, ep.r, ep.mult_exp + 1.0)); - return sign ? result : -result; + // tanh(x) = (exp(2x) - 1) / (exp(2x) + 1) + auto ep = exp_b_eval(2.0f * x); // exp(2 * x) +#if defined(LIBC_TARGET_HAS_FMA) + return fputil::multiply_add(ep.mult_exp, ep.r, -1.0) / + fputil::multiply_add(ep.mult_exp, ep.r, 1.0); +#else + double exp_x = ep.mult_exp * ep.r; + return (exp_x - 1.0) / (exp_x + 1.0); +#endif // LIBC_TARGET_HAS_FMA } } // namespace __llvm_libc diff --git a/libc/test/src/math/explogxf_test.cpp b/libc/test/src/math/explogxf_test.cpp --- a/libc/test/src/math/explogxf_test.cpp +++ b/libc/test/src/math/explogxf_test.cpp @@ -27,9 +27,8 @@ TEST(LlvmLibcExpxfTest, InFloatRange) { auto fx = [](float x) -> float { - auto result = __llvm_libc::exp_eval<-1>(x); - return static_cast(2 * result.mult_exp * result.r + - 2 * result.mult_exp); + auto result = __llvm_libc::exp_b_eval<__llvm_libc::ExpBase>(x); + return static_cast(result.mult_exp * result.r); }; auto f_check = [](float x) -> bool { return !(