diff --git a/libc/src/math/generic/common_constants.h b/libc/src/math/generic/common_constants.h --- a/libc/src/math/generic/common_constants.h +++ b/libc/src/math/generic/common_constants.h @@ -20,6 +20,12 @@ // Lookup table for range reduction constants r for logarithms. extern const float R[128]; +// Lookup table for range reduction constants r for logarithms. +extern const double RD[128]; + +// Lookup table for -log(r) +extern const double LOG_R[128]; + // Lookup table for exp(m) with m = -104, ..., 89. // -104 = floor(log(single precision's min denormal)) // 89 = ceil(log(single precision's max normal)) diff --git a/libc/src/math/generic/common_constants.cpp b/libc/src/math/generic/common_constants.cpp --- a/libc/src/math/generic/common_constants.cpp +++ b/libc/src/math/generic/common_constants.cpp @@ -109,7 +109,7 @@ // precision, and -2^-8 <= v < 2^-7. // TODO(lntue): Add reference to how the constants are derived after the // resulting paper is ready. -const float R[128] = { +alignas(32) const float R[128] = { 0x1p0, 0x1.fcp-1, 0x1.f8p-1, 0x1.f4p-1, 0x1.fp-1, 0x1.ecp-1, 0x1.e8p-1, 0x1.e4p-1, 0x1.ep-1, 0x1.dep-1, 0x1.dap-1, 0x1.d6p-1, 0x1.d4p-1, 0x1.dp-1, 0x1.ccp-1, 0x1.cap-1, 0x1.c6p-1, 0x1.c4p-1, 0x1.cp-1, 0x1.bep-1, 0x1.bap-1, @@ -130,6 +130,72 @@ 0x1.0ap-1, 0x1.08p-1, 0x1.08p-1, 0x1.06p-1, 0x1.06p-1, 0x1.04p-1, 0x1.04p-1, 0x1.02p-1, 0x1.0p-1}; +alignas(64) const double RD[128] = { + 0x1p0, 0x1.fcp-1, 0x1.f8p-1, 0x1.f4p-1, 0x1.fp-1, 0x1.ecp-1, 0x1.e8p-1, + 0x1.e4p-1, 0x1.ep-1, 0x1.dep-1, 0x1.dap-1, 0x1.d6p-1, 0x1.d4p-1, 0x1.dp-1, + 0x1.ccp-1, 0x1.cap-1, 0x1.c6p-1, 0x1.c4p-1, 0x1.cp-1, 0x1.bep-1, 0x1.bap-1, + 0x1.b8p-1, 0x1.b4p-1, 0x1.b2p-1, 0x1.aep-1, 0x1.acp-1, 0x1.a8p-1, 0x1.a6p-1, + 0x1.a4p-1, 0x1.ap-1, 0x1.9ep-1, 0x1.9cp-1, 0x1.98p-1, 0x1.96p-1, 0x1.94p-1, + 0x1.92p-1, 0x1.9p-1, 0x1.8cp-1, 0x1.8ap-1, 0x1.88p-1, 0x1.86p-1, 0x1.84p-1, + 0x1.8p-1, 0x1.7ep-1, 0x1.7cp-1, 0x1.7ap-1, 0x1.78p-1, 0x1.76p-1, 0x1.74p-1, + 0x1.72p-1, 0x1.7p-1, 0x1.6ep-1, 0x1.6cp-1, 0x1.6ap-1, 0x1.68p-1, 0x1.66p-1, + 0x1.64p-1, 0x1.62p-1, 0x1.6p-1, 0x1.5ep-1, 0x1.5cp-1, 0x1.5ap-1, 0x1.58p-1, + 0x1.56p-1, 0x1.54p-1, 0x1.54p-1, 0x1.52p-1, 0x1.5p-1, 0x1.4ep-1, 0x1.4cp-1, + 0x1.4ap-1, 0x1.4ap-1, 0x1.48p-1, 0x1.46p-1, 0x1.44p-1, 0x1.42p-1, 0x1.4p-1, + 0x1.4p-1, 0x1.3ep-1, 0x1.3cp-1, 0x1.3ap-1, 0x1.3ap-1, 0x1.38p-1, 0x1.36p-1, + 0x1.34p-1, 0x1.34p-1, 0x1.32p-1, 0x1.3p-1, 0x1.3p-1, 0x1.2ep-1, 0x1.2cp-1, + 0x1.2cp-1, 0x1.2ap-1, 0x1.28p-1, 0x1.28p-1, 0x1.26p-1, 0x1.24p-1, 0x1.24p-1, + 0x1.22p-1, 0x1.2p-1, 0x1.2p-1, 0x1.1ep-1, 0x1.1cp-1, 0x1.1cp-1, 0x1.1ap-1, + 0x1.1ap-1, 0x1.18p-1, 0x1.16p-1, 0x1.16p-1, 0x1.14p-1, 0x1.14p-1, 0x1.12p-1, + 0x1.1p-1, 0x1.1p-1, 0x1.0ep-1, 0x1.0ep-1, 0x1.0cp-1, 0x1.0cp-1, 0x1.0ap-1, + 0x1.0ap-1, 0x1.08p-1, 0x1.08p-1, 0x1.06p-1, 0x1.06p-1, 0x1.04p-1, 0x1.04p-1, + 0x1.02p-1, 0x1.0p-1}; + +alignas(64) const double LOG_R[128] = { + 0x0.0000000000000p0, 0x1.010157588de71p-7, 0x1.0205658935847p-6, + 0x1.8492528c8cabfp-6, 0x1.0415d89e74444p-5, 0x1.466aed42de3eap-5, + 0x1.894aa149fb343p-5, 0x1.ccb73cdddb2ccp-5, 0x1.08598b59e3a07p-4, + 0x1.1973bd1465567p-4, 0x1.3bdf5a7d1ee64p-4, 0x1.5e95a4d9791cbp-4, + 0x1.700d30aeac0e1p-4, 0x1.9335e5d594989p-4, 0x1.b6ac88dad5b1cp-4, + 0x1.c885801bc4b23p-4, 0x1.ec739830a112p-4, 0x1.fe89139dbd566p-4, + 0x1.1178e8227e47cp-3, 0x1.1aa2b7e23f72ap-3, 0x1.2d1610c86813ap-3, + 0x1.365fcb0159016p-3, 0x1.4913d8333b561p-3, 0x1.527e5e4a1b58dp-3, + 0x1.6574ebe8c133ap-3, 0x1.6f0128b756abcp-3, 0x1.823c16551a3c2p-3, + 0x1.8beafeb38fe8cp-3, 0x1.95a5adcf7017fp-3, 0x1.a93ed3c8ad9e3p-3, + 0x1.b31d8575bce3dp-3, 0x1.bd087383bd8adp-3, 0x1.d1037f2655e7bp-3, + 0x1.db13db0d4894p-3, 0x1.e530effe71012p-3, 0x1.ef5ade4dcffe6p-3, + 0x1.f991c6cb3b379p-3, 0x1.07138604d5862p-2, 0x1.0c42d676162e3p-2, + 0x1.1178e8227e47cp-2, 0x1.16b5ccbacfb73p-2, 0x1.1bf99635a6b95p-2, + 0x1.269621134db92p-2, 0x1.2bef07cdc9354p-2, 0x1.314f1e1d35ce4p-2, + 0x1.36b6776be1117p-2, 0x1.3c25277333184p-2, 0x1.419b423d5e8c7p-2, + 0x1.4718dc271c41bp-2, 0x1.4c9e09e172c3cp-2, 0x1.522ae0738a3d8p-2, + 0x1.57bf753c8d1fbp-2, 0x1.5d5bddf595f3p-2, 0x1.630030b3aac49p-2, + 0x1.68ac83e9c6a14p-2, 0x1.6e60ee6af1972p-2, 0x1.741d876c67bb1p-2, + 0x1.79e26687cfb3ep-2, 0x1.7fafa3bd8151cp-2, 0x1.85855776dcbfbp-2, + 0x1.8b639a88b2df5p-2, 0x1.914a8635bf68ap-2, 0x1.973a3431356aep-2, + 0x1.9d32bea15ed3bp-2, 0x1.a33440224fa79p-2, 0x1.a33440224fa79p-2, + 0x1.a93ed3c8ad9e3p-2, 0x1.af5295248cddp-2, 0x1.b56fa04462909p-2, + 0x1.bb9611b80e2fbp-2, 0x1.c1c60693fa39ep-2, 0x1.c1c60693fa39ep-2, + 0x1.c7ff9c74554c9p-2, 0x1.ce42f18064743p-2, 0x1.d490246defa6bp-2, + 0x1.dae75484c9616p-2, 0x1.e148a1a2726cep-2, 0x1.e148a1a2726cep-2, + 0x1.e7b42c3ddad73p-2, 0x1.ee2a156b413e5p-2, 0x1.f4aa7ee03192dp-2, + 0x1.f4aa7ee03192dp-2, 0x1.fb358af7a4884p-2, 0x1.00e5ae5b207abp-1, + 0x1.04360be7603adp-1, 0x1.04360be7603adp-1, 0x1.078bf0533c568p-1, + 0x1.0ae76e2d054fap-1, 0x1.0ae76e2d054fap-1, 0x1.0e4898611cce1p-1, + 0x1.11af823c75aa8p-1, 0x1.11af823c75aa8p-1, 0x1.151c3f6f29612p-1, + 0x1.188ee40f23ca6p-1, 0x1.188ee40f23ca6p-1, 0x1.1c07849ae6007p-1, + 0x1.1f8635fc61659p-1, 0x1.1f8635fc61659p-1, 0x1.230b0d8bebc98p-1, + 0x1.269621134db92p-1, 0x1.269621134db92p-1, 0x1.2a2786d0ec107p-1, + 0x1.2dbf557b0df43p-1, 0x1.2dbf557b0df43p-1, 0x1.315da4434068bp-1, + 0x1.315da4434068bp-1, 0x1.35028ad9d8c86p-1, 0x1.38ae2171976e7p-1, + 0x1.38ae2171976e7p-1, 0x1.3c6080c36bfb5p-1, 0x1.3c6080c36bfb5p-1, + 0x1.4019c2125ca93p-1, 0x1.43d9ff2f923c5p-1, 0x1.43d9ff2f923c5p-1, + 0x1.47a1527e8a2d3p-1, 0x1.47a1527e8a2d3p-1, 0x1.4b6fd6f970c1fp-1, + 0x1.4b6fd6f970c1fp-1, 0x1.4f45a835a4e19p-1, 0x1.4f45a835a4e19p-1, + 0x1.5322e26867857p-1, 0x1.5322e26867857p-1, 0x1.5707a26bb8c66p-1, + 0x1.5707a26bb8c66p-1, 0x1.5af405c3649ep-1, 0x1.5af405c3649ep-1, + 0x1.5ee82aa24192p-1, 0x0.000000000000p0}; + // Lookup table for exp(m) with m = -104, ..., 89. // -104 = floor(log(single precision's min denormal)) // 89 = ceil(log(single precision's max normal)) diff --git a/libc/src/math/generic/logf.cpp b/libc/src/math/generic/logf.cpp --- a/libc/src/math/generic/logf.cpp +++ b/libc/src/math/generic/logf.cpp @@ -57,81 +57,115 @@ FPBits xbits(x); uint32_t x_u = xbits.uintval(); + int m = -FPBits::EXPONENT_BIAS; + using fputil::round_result_slightly_down; using fputil::round_result_slightly_up; - switch (x_u) { - case 0x3f800001U: // x = 0x1.000002p+0f - return round_result_slightly_up(0x1.fffffep-24f); - case 0x41178febU: // x = 0x1.2f1fd6p+3f - return round_result_slightly_up(0x1.1fcbcep+1f); - case 0x4c5d65a5U: // x = 0x1.bacb4ap+25f - return round_result_slightly_down(0x1.1e0696p+4f); - case 0x500ffb03U: // x = 0x1.1ff606p+33f - return round_result_slightly_up(0x1.6fdd34p+4f); - case 0x5cd69e88U: // x = 0x1.ad3d1p+58f - return round_result_slightly_up(0x1.45c146p+5f); - case 0x65d890d3U: // x = 0x1.b121a6p+76f - return round_result_slightly_down(0x1.a9a3f2p+5f); - case 0x6f31a8ecU: // x = 0x1.6351d8p+95f - return round_result_slightly_down(0x1.08b512p+6f); - case 0x7a17f30aU: // x = 0x1.2fe614p+117f - return round_result_slightly_up(0x1.451436p+6f); -#ifndef LIBC_TARGET_CPU_HAS_FMA - case 0x1b7679ffU: // x = 0x1.ecf3fep-73f - return round_result_slightly_up(-0x1.8f8e5ap+5f); - case 0x1e88452dU: // x = 0x1.108a5ap-66f - return round_result_slightly_up(-0x1.6d7b18p+5f); - case 0x5ee8984eU: // x = 0x1.d1309cp+62f; - return round_result_slightly_up(0x1.5c9442p+5f); - case 0x665e7ca6U: // x = 0x1.bcf94cp+77f - return round_result_slightly_up(0x1.af66cp+5f); - case 0x79e7ec37U: // x = 0x1.cfd86ep+116f - return round_result_slightly_up(0x1.43ff6ep+6f); + // Small inputs + if (x_u < 0x4c5d65a5U) { + // Hard-to-round cases. + switch (x_u) { + case 0x3f7f4d6fU: // x = 0x1.fe9adep-1f + return round_result_slightly_up(-0x1.659ec8p-9f); + case 0x41178febU: // x = 0x1.2f1fd6p+3f + return round_result_slightly_up(0x1.1fcbcep+1f); +#ifdef LIBC_TARGET_CPU_HAS_FMA + case 0x3f800000U: // x = 1.0f + return 0.0f; +#else + case 0x1e88452dU: // x = 0x1.108a5ap-66f + return round_result_slightly_up(-0x1.6d7b18p+5f); #endif // LIBC_TARGET_CPU_HAS_FMA - } - - int m = -FPBits::EXPONENT_BIAS; - - if (LIBC_UNLIKELY(x_u < FPBits::MIN_NORMAL || x_u > FPBits::MAX_NORMAL)) { - if (xbits.is_zero()) { - // Return -inf and raise FE_DIVBYZERO - fputil::set_errno_if_required(ERANGE); - fputil::raise_except_if_required(FE_DIVBYZERO); - return static_cast(FPBits::neg_inf()); } - if (xbits.get_sign() && !xbits.is_nan()) { - // Return NaN and raise FE_INVALID - fputil::set_errno_if_required(EDOM); - fputil::raise_except_if_required(FE_INVALID); - return FPBits::build_quiet_nan(0); + // Subnormal inputs. + if (LIBC_UNLIKELY(x_u < FPBits::MIN_NORMAL)) { + if (x_u == 0) { + // Return -inf and raise FE_DIVBYZERO + fputil::set_errno_if_required(ERANGE); + fputil::raise_except_if_required(FE_DIVBYZERO); + return static_cast(FPBits::neg_inf()); + } + // Normalize denormal inputs. + xbits.set_val(xbits.get_val() * 0x1.0p23f); + m -= 23; + x_u = xbits.uintval(); + } + } else { + // Hard-to-round cases. + switch (x_u) { + case 0x4c5d65a5U: // x = 0x1.bacb4ap+25f + return round_result_slightly_down(0x1.1e0696p+4f); + case 0x65d890d3U: // x = 0x1.b121a6p+76f + return round_result_slightly_down(0x1.a9a3f2p+5f); + case 0x6f31a8ecU: // x = 0x1.6351d8p+95f + return round_result_slightly_down(0x1.08b512p+6f); + case 0x7a17f30aU: // x = 0x1.2fe614p+117f + return round_result_slightly_up(0x1.451436p+6f); +#ifndef LIBC_TARGET_CPU_HAS_FMA + case 0x500ffb03U: // x = 0x1.1ff606p+33f + return round_result_slightly_up(0x1.6fdd34p+4f); + case 0x5cd69e88U: // x = 0x1.ad3d1p+58f + return round_result_slightly_up(0x1.45c146p+5f); + case 0x5ee8984eU: // x = 0x1.d1309cp+62f; + return round_result_slightly_up(0x1.5c9442p+5f); +#endif // LIBC_TARGET_CPU_HAS_FMA } - if (xbits.is_inf_or_nan()) { + // Exceptional inputs. + if (LIBC_UNLIKELY(x_u > FPBits::MAX_NORMAL)) { + if (x_u == 0x8000'0000U) { + // Return -inf and raise FE_DIVBYZERO + fputil::set_errno_if_required(ERANGE); + fputil::raise_except_if_required(FE_DIVBYZERO); + return static_cast(FPBits::neg_inf()); + } + if (xbits.get_sign() && !xbits.is_nan()) { + // Return NaN and raise FE_INVALID + fputil::set_errno_if_required(EDOM); + fputil::raise_except_if_required(FE_INVALID); + return FPBits::build_quiet_nan(0); + } + // x is +inf or nan return x; } - // Normalize denormal inputs. - xbits.set_val(xbits.get_val() * 0x1.0p23f); - m -= 23; } - m += xbits.get_unbiased_exponent(); - int f_index = xbits.get_mantissa() >> 16; - // Set bits to 1.m - xbits.set_unbiased_exponent(0x7F); - - FPBits f = xbits; - f.bits &= ~0x0000'FFFF; +#ifndef LIBC_TARGET_CPU_HAS_FMA + // Returning the correct +0 when x = 1.0 for non-FMA targets with FE_DOWNWARD + // rounding mode. + if (LIBC_UNLIKELY((x_u & 0x007f'ffffU) == 0)) + return static_cast( + static_cast(m + xbits.get_unbiased_exponent()) * LOG_2); +#endif // LIBC_TARGET_CPU_HAS_FMA - double d = static_cast(xbits) - static_cast(f); - d *= ONE_OVER_F[f_index]; + uint32_t mant = xbits.get_mantissa(); + // Extract 7 leading fractional bits of the mantissa + int index = mant >> 16; + // Add unbiased exponent. Add an extra 1 if the 7 leading fractional bits are + // all 1's. + m += static_cast((x_u + (1 << 16)) >> 23); - double extra_factor = - fputil::multiply_add(static_cast(m), LOG_2, LOG_F[f_index]); + // Set bits to 1.m + xbits.set_unbiased_exponent(0x7F); - double r = __llvm_libc::fputil::polyeval( - d, extra_factor, 0x1.fffffffffffacp-1, -0x1.fffffffef9cb2p-2, - 0x1.5555513bc679ap-2, -0x1.fff4805ea441p-3, 0x1.930180dbde91ap-3); + float u = static_cast(xbits); + double v; +#ifdef LIBC_TARGET_CPU_HAS_FMA + v = static_cast(fputil::multiply_add(u, R[index], -1.0f)); // Exact. +#else + v = fputil::multiply_add(static_cast(u), RD[index], -1.0); // Exact +#endif // LIBC_TARGET_CPU_HAS_FMA + // Degree-5 polynomial approximation of log generated by Sollya with: + // > P = fpminimax(log(1 + x)/x, 4, [|1, D...|], [-2^-8, 2^-7]); + constexpr double COEFFS[4] = {-0x1.000000000fe63p-1, 0x1.555556e963c16p-2, + -0x1.000028dedf986p-2, 0x1.966681bfda7f7p-3}; + double v2 = v * v; // Exact + double p2 = fputil::multiply_add(v, COEFFS[3], COEFFS[2]); + double p1 = fputil::multiply_add(v, COEFFS[1], COEFFS[0]); + double p0 = LOG_R[index] + v; + double r = fputil::multiply_add(static_cast(m), LOG_2, + fputil::polyeval(v2, p0, p1, p2)); return static_cast(r); } diff --git a/libc/test/src/math/logf_test.cpp b/libc/test/src/math/logf_test.cpp --- a/libc/test/src/math/logf_test.cpp +++ b/libc/test/src/math/logf_test.cpp @@ -26,7 +26,7 @@ EXPECT_FP_EQ_WITH_EXCEPTION(neg_inf, __llvm_libc::logf(0.0f), FE_DIVBYZERO); EXPECT_FP_EQ_WITH_EXCEPTION(neg_inf, __llvm_libc::logf(-0.0f), FE_DIVBYZERO); EXPECT_FP_IS_NAN_WITH_EXCEPTION(__llvm_libc::logf(-1.0f), FE_INVALID); - EXPECT_FP_EQ(zero, __llvm_libc::logf(1.0f)); + EXPECT_FP_EQ_ALL_ROUNDING(zero, __llvm_libc::logf(1.0f)); } TEST(LlvmLibcLogfTest, TrickyInputs) {