diff --git a/libc/src/math/generic/common_constants.h b/libc/src/math/generic/common_constants.h --- a/libc/src/math/generic/common_constants.h +++ b/libc/src/math/generic/common_constants.h @@ -17,6 +17,9 @@ // Lookup table for log(f) = log(1 + n*2^(-7)) where n = 0..127. extern const double LOG_F[128]; +// Lookup table for range reduction constants r for logarithms. +extern const double R[128]; + // Lookup table for exp(m) with m = -104, ..., 89. // -104 = floor(log(single precision's min denormal)) // 89 = ceil(log(single precision's max normal)) diff --git a/libc/src/math/generic/common_constants.cpp b/libc/src/math/generic/common_constants.cpp --- a/libc/src/math/generic/common_constants.cpp +++ b/libc/src/math/generic/common_constants.cpp @@ -102,6 +102,37 @@ 0x1.58cadb5cd7989p-1, 0x1.5ad404c359f2cp-1, 0x1.5cdb1dc6c1764p-1, 0x1.5ee02a9241675p-1, 0x1.60e32f44788d8p-1}; +// Range reduction constants for logarithms. +// r(0) = 1, r(127) = 0.5 +// r(k) = 2^-10 * ceil(2^10 * (1 - 2^-8) / (1 + k*2^-7)) +// The constants are chosen so that v = r*m_x - 1 and v^2 are exact in +// double precision, even without FMA instructions, and -2^-8 <= v < 2^-7. +// TODO(lntue): Add reference to how the constants are derived after the +// resulting paper is ready. +const double R[128] = { + 0x1p0, 0x1.fa8p-1, 0x1.f68p-1, 0x1.f28p-1, 0x1.efp-1, 0x1.ebp-1, + 0x1.e78p-1, 0x1.e4p-1, 0x1.ep-1, 0x1.dc8p-1, 0x1.d98p-1, 0x1.d6p-1, + 0x1.d28p-1, 0x1.cfp-1, 0x1.ccp-1, 0x1.c9p-1, 0x1.c58p-1, 0x1.c28p-1, + 0x1.bf8p-1, 0x1.bc8p-1, 0x1.b98p-1, 0x1.b68p-1, 0x1.b38p-1, 0x1.b08p-1, + 0x1.ad8p-1, 0x1.abp-1, 0x1.a8p-1, 0x1.a58p-1, 0x1.a28p-1, 0x1.ap-1, + 0x1.9d8p-1, 0x1.9bp-1, 0x1.98p-1, 0x1.958p-1, 0x1.93p-1, 0x1.908p-1, + 0x1.8e8p-1, 0x1.8cp-1, 0x1.898p-1, 0x1.87p-1, 0x1.85p-1, 0x1.828p-1, + 0x1.8p-1, 0x1.7ep-1, 0x1.7cp-1, 0x1.798p-1, 0x1.778p-1, 0x1.758p-1, + 0x1.73p-1, 0x1.71p-1, 0x1.6fp-1, 0x1.6dp-1, 0x1.6bp-1, 0x1.69p-1, + 0x1.67p-1, 0x1.65p-1, 0x1.63p-1, 0x1.61p-1, 0x1.5fp-1, 0x1.5d8p-1, + 0x1.5b8p-1, 0x1.598p-1, 0x1.58p-1, 0x1.56p-1, 0x1.54p-1, 0x1.528p-1, + 0x1.508p-1, 0x1.4fp-1, 0x1.4d8p-1, 0x1.4b8p-1, 0x1.4ap-1, 0x1.488p-1, + 0x1.468p-1, 0x1.45p-1, 0x1.438p-1, 0x1.42p-1, 0x1.4p-1, 0x1.3e8p-1, + 0x1.3dp-1, 0x1.3b8p-1, 0x1.3ap-1, 0x1.388p-1, 0x1.37p-1, 0x1.358p-1, + 0x1.34p-1, 0x1.328p-1, 0x1.318p-1, 0x1.3p-1, 0x1.2e8p-1, 0x1.2dp-1, + 0x1.2b8p-1, 0x1.2a8p-1, 0x1.29p-1, 0x1.278p-1, 0x1.268p-1, 0x1.25p-1, + 0x1.238p-1, 0x1.228p-1, 0x1.21p-1, 0x1.2p-1, 0x1.1e8p-1, 0x1.1d8p-1, + 0x1.1cp-1, 0x1.1bp-1, 0x1.198p-1, 0x1.188p-1, 0x1.17p-1, 0x1.16p-1, + 0x1.15p-1, 0x1.138p-1, 0x1.128p-1, 0x1.118p-1, 0x1.1p-1, 0x1.0fp-1, + 0x1.0ep-1, 0x1.0dp-1, 0x1.0cp-1, 0x1.0a8p-1, 0x1.098p-1, 0x1.088p-1, + 0x1.078p-1, 0x1.068p-1, 0x1.058p-1, 0x1.048p-1, 0x1.038p-1, 0x1.028p-1, + 0x1.018p-1, 0x1.0p-1}; + // Lookup table for exp(m) with m = -104, ..., 89. // -104 = floor(log(single precision's min denormal)) // 89 = ceil(log(single precision's max normal)) diff --git a/libc/src/math/generic/log10f.cpp b/libc/src/math/generic/log10f.cpp --- a/libc/src/math/generic/log10f.cpp +++ b/libc/src/math/generic/log10f.cpp @@ -56,53 +56,51 @@ namespace __llvm_libc { -// Exact power of 10 in float: - -// Lookup table for log10(f) = log10(1 + n*2^(-7)) where n = 0..127. -static constexpr double LOG10_F[128] = { - 0x0.0000000000000p+0, 0x1.bafd47221ed26p-9, 0x1.b9476a4fcd10fp-8, - 0x1.49b0851443684p-7, 0x1.b5e908eb13790p-7, 0x1.10a83a8446c78p-6, - 0x1.45f4f5acb8be0p-6, 0x1.7adc3df3b1ff8p-6, 0x1.af5f92b00e610p-6, - 0x1.e3806acbd058fp-6, 0x1.0ba01a8170000p-5, 0x1.25502c0fc314cp-5, - 0x1.3ed1199a5e425p-5, 0x1.58238eeb353dap-5, 0x1.71483427d2a99p-5, - 0x1.8a3fadeb847f4p-5, 0x1.a30a9d609efeap-5, 0x1.bba9a058dfd84p-5, - 0x1.d41d5164facb4p-5, 0x1.ec6647eb58808p-5, 0x1.02428c1f08016p-4, - 0x1.0e3d29d81165ep-4, 0x1.1a23445501816p-4, 0x1.25f5215eb594ap-4, - 0x1.31b3055c47118p-4, 0x1.3d5d335c53179p-4, 0x1.48f3ed1df48fbp-4, - 0x1.5477731973e85p-4, 0x1.5fe80488af4fdp-4, 0x1.6b45df6f3e2c9p-4, - 0x1.769140a2526fdp-4, 0x1.81ca63d05a44ap-4, 0x1.8cf183886480dp-4, - 0x1.9806d9414a209p-4, 0x1.a30a9d609efeap-4, 0x1.adfd07416be07p-4, - 0x1.b8de4d3ab3d98p-4, 0x1.c3aea4a5c6effp-4, 0x1.ce6e41e463da5p-4, - 0x1.d91d5866aa99cp-4, 0x1.e3bc1ab0e19fep-4, 0x1.ee4aba610f204p-4, - 0x1.f8c9683468191p-4, 0x1.019c2a064b486p-3, 0x1.06cbd67a6c3b6p-3, - 0x1.0bf3d0937c41cp-3, 0x1.11142f0811357p-3, 0x1.162d082ac9d10p-3, - 0x1.1b3e71ec94f7bp-3, 0x1.204881dee8777p-3, 0x1.254b4d35e7d3cp-3, - 0x1.2a46e8ca7ba2ap-3, 0x1.2f3b691c5a001p-3, 0x1.3428e2540096dp-3, - 0x1.390f6844a0b83p-3, 0x1.3def0e6dfdf85p-3, 0x1.42c7e7fe3fc02p-3, - 0x1.479a07d3b6411p-3, 0x1.4c65807e93338p-3, 0x1.512a644296c3dp-3, - 0x1.55e8c518b10f8p-3, 0x1.5aa0b4b0988fap-3, 0x1.5f52447255c92p-3, - 0x1.63fd857fc49bbp-3, 0x1.68a288b60b7fcp-3, 0x1.6d415eaf0906bp-3, - 0x1.71da17c2b7e80p-3, 0x1.766cc40889e85p-3, 0x1.7af97358b9e04p-3, - 0x1.7f80354d952a0p-3, 0x1.84011944bcb75p-3, 0x1.887c2e605e119p-3, - 0x1.8cf183886480dp-3, 0x1.9161276ba2978p-3, 0x1.95cb2880f45bap-3, - 0x1.9a2f95085a45cp-3, 0x1.9e8e7b0c0d4bep-3, 0x1.a2e7e8618c2d2p-3, - 0x1.a73beaaaa22f4p-3, 0x1.ab8a8f56677fcp-3, 0x1.afd3e3a23b680p-3, - 0x1.b417f49ab8807p-3, 0x1.b856cf1ca3105p-3, 0x1.bc907fd5d1c40p-3, - 0x1.c0c5134610e26p-3, 0x1.c4f495c0002a2p-3, 0x1.c91f1369eb7cap-3, - 0x1.cd44983e9e7bdp-3, 0x1.d165300e333f7p-3, 0x1.d580e67edc43dp-3, - 0x1.d997c70da9b47p-3, 0x1.dda9dd0f4a329p-3, 0x1.e1b733b0c7381p-3, - 0x1.e5bfd5f83d342p-3, 0x1.e9c3cec58f807p-3, 0x1.edc328d3184afp-3, - 0x1.f1bdeeb654901p-3, 0x1.f5b42ae08c407p-3, 0x1.f9a5e79f76ac5p-3, - 0x1.fd932f1ddb4d6p-3, 0x1.00be05b217844p-2, 0x1.02b0432c96ff0p-2, - 0x1.04a054e139004p-2, 0x1.068e3fa282e3dp-2, 0x1.087a0832fa7acp-2, - 0x1.0a63b3456c819p-2, 0x1.0c4b457d3193dp-2, 0x1.0e30c36e71a7fp-2, - 0x1.1014319e661bdp-2, 0x1.11f594839a5bdp-2, 0x1.13d4f0862b2e1p-2, - 0x1.15b24a0004a92p-2, 0x1.178da53d1ee01p-2, 0x1.1967067bb94b8p-2, - 0x1.1b3e71ec94f7bp-2, 0x1.1d13ebb32d7f9p-2, 0x1.1ee777e5f0dc3p-2, - 0x1.20b91a8e76105p-2, 0x1.2288d7a9b2b64p-2, 0x1.2456b3282f786p-2, - 0x1.2622b0ee3b79dp-2, 0x1.27ecd4d41eb67p-2, 0x1.29b522a64b609p-2, - 0x1.2b7b9e258e422p-2, 0x1.2d404b073e27ep-2, 0x1.2f032cf56a5bep-2, - 0x1.30c4478f0835fp-2, 0x1.32839e681fc62p-2}; +// Lookup table for log10(r) where r is defined in common_constants.cpp. +static constexpr double LOG10_R[128] = { + 0x0.0000000000000p+0, 0x1.3365b88c0f347p-8, 0x1.0a880e41a67f6p-7, + 0x1.7c441aa5e6d15p-7, 0x1.e088f0b004827p-7, 0x1.29fff7b8ca79dp-6, + 0x1.5ce7223998b98p-6, 0x1.902c31d62a843p-6, 0x1.cb38fccd8bfdbp-6, + 0x1.ff4be13b9292p-6, 0x1.161e4374c37f4p-5, 0x1.30838cdc2fbfdp-5, + 0x1.4b1b588129203p-5, 0x1.65e6692547c4ep-5, 0x1.7d070145f4fd7p-5, + 0x1.944e56a0d345p-5, 0x1.afa88992aea19p-5, 0x1.c74591df72456p-5, + 0x1.df0afe1505738p-5, 0x1.f6f9594de60fp-5, 0x1.078898bc05bf4p-4, + 0x1.13a98bb450f81p-4, 0x1.1fdfcf7839804p-4, 0x1.2c2baf78817b7p-4, + 0x1.388d78b9350ffp-4, 0x1.42efeb4b506e9p-4, 0x1.4f7aad9bbcbafp-4, + 0x1.59ffb662b815cp-4, 0x1.66b4847a68997p-4, 0x1.715d0ce367afcp-4, + 0x1.7c1607b7d7e32p-4, 0x1.86dfa808d36ap-4, 0x1.93e7de0fc3e8p-4, + 0x1.9ed6d60b30612p-4, 0x1.a9d71d5258484p-4, 0x1.b4e8eb0bbf58fp-4, + 0x1.bdd0b9fd09a1p-4, 0x1.c902a19e65111p-4, 0x1.d446afb64c7e5p-4, + 0x1.df9d1f7f5b674p-4, 0x1.e8bc77271b97ap-4, 0x1.f4349618fa91ap-4, + 0x1.ffbfc2bbc7803p-4, 0x1.0484e4942aa43p-3, 0x1.093025a19976cp-3, + 0x1.0f0f17a062353p-3, 0x1.13c8a1e34f7ap-3, 0x1.1888a1c995931p-3, + 0x1.1e81d22b790d4p-3, 0x1.23509c1e6d937p-3, 0x1.2826167a6bc9cp-3, + 0x1.2d0253f67e4cbp-3, 0x1.31e56798a910ap-3, 0x1.36cf64b7a82e6p-3, + 0x1.3bc05efcbb185p-3, 0x1.40b86a657ca3dp-3, 0x1.45b79b45c8551p-3, + 0x1.4abe0649ad606p-3, 0x1.4fcbc0776fd85p-3, 0x1.539ae4e6f9fcep-3, + 0x1.58b59d4bcc3a5p-3, 0x1.5dd7e08de382fp-3, 0x1.61b6939983048p-3, + 0x1.66e6400da3f77p-3, 0x1.6c1db5f9bb336p-3, 0x1.700c78c3e6109p-3, + 0x1.7551c81bd8dcfp-3, 0x1.794b099a25e7cp-3, 0x1.7d48dbc3a9ca1p-3, + 0x1.82a2757995cbep-3, 0x1.86ab17c10bc7fp-3, 0x1.8ab86e6288cd7p-3, + 0x1.9026f112197e8p-3, 0x1.943f6cfd3e96fp-3, 0x1.985cc29a13f12p-3, + 0x1.9c7efd734a2f9p-3, 0x1.a209a84fbcff8p-3, 0x1.a6377d2ce9378p-3, + 0x1.aa6a5eeebefe3p-3, 0x1.aea259d8ffa0bp-3, 0x1.b2df7a5c50299p-3, + 0x1.b721cd17157e3p-3, 0x1.bb695ed655c7dp-3, 0x1.bfb63c969f4ffp-3, + 0x1.c4087384f4f8p-3, 0x1.c86010ffc076cp-3, 0x1.cb482b6cf2083p-3, + 0x1.cfa8e765cbb72p-3, 0x1.d40f2e8a825p-3, 0x1.d87b0ef71db45p-3, + 0x1.dcec96fdc888fp-3, 0x1.dfe61d1174ee7p-3, 0x1.e461322d1648ap-3, + 0x1.e8e216243dd6p-3, 0x1.ebe5ef9d44036p-3, 0x1.f070a36b8d9c1p-3, + 0x1.f5014ef538a5bp-3, 0x1.f80fc47f58028p-3, 0x1.fcaa8567eba7ap-3, + 0x1.ffbfc2bbc7803p-3, 0x1.023262f907322p-2, 0x1.03c074b41e497p-2, + 0x1.06182e84fd4acp-2, 0x1.07a9c2e44ae7ep-2, 0x1.0a06cc96a2056p-2, + 0x1.0b9bf3979e1e1p-2, 0x1.0dfe65799fe64p-2, 0x1.0f972f87ff3d6p-2, + 0x1.113172b49c9fp-2, 0x1.139ba089e300cp-2, 0x1.15399e81ea83dp-2, + 0x1.16d91f45d0888p-2, 0x1.194b3bdef6b9ep-2, 0x1.1aee9017cb45p-2, + 0x1.1c93712abc7ffp-2, 0x1.1e39e209bd0bp-2, 0x1.1fe1e5af2c141p-2, + 0x1.2260e4f424958p-2, 0x1.240ce4c975444p-2, 0x1.25ba8215af7fcp-2, + 0x1.2769bffab2ep-2, 0x1.291aa1a384978p-2, 0x1.2acd2a4473334p-2, + 0x1.2c815d1b3b0a1p-2, 0x1.2e373d6f2b5f2p-2, 0x1.2feece914c3c4p-2, + 0x1.31a813dc85081p-2, 0x1.34413509f79ffp-2}; LLVM_LIBC_FUNCTION(float, log10f, (float x)) { constexpr double LOG10_2 = 0x1.34413509f79ffp-2; @@ -133,17 +131,21 @@ return 9.0f; case 0x5015'02f9U: // x = 10,000,000,000 return 10.0f; + case 0x0efe'ee7aU: // x = 0x1.fddcf4p-98f + return fputil::round_result_slightly_up(-0x1.d33a46p+4f); + case 0x3f80'70d8U: // x = 0x1.00e1bp0f + return fputil::round_result_slightly_up(0x1.8762c4p-10f); +#ifndef LIBC_TARGET_CPU_HAS_FMA + case 0x08ae'a356U: // x = 0x1.5d46acp-110f + return fputil::round_result_slightly_up(-0x1.07d3b4p+5f); + case 0x120b'93dcU: // x = 0x1.1727b8p-91f + return fputil::round_result_slightly_down(-0x1.b5b2aep+4f); + case 0x13ae'78d3U: // x = 0x1.5cf1a6p-88f + return fputil::round_result_slightly_down(-0x1.a5b2aep+4f); case 0x4f13'4f83U: // x = 2471461632.0 return fputil::round_result_slightly_down(0x1.2c9314p+3f); case 0x7956'ba5eU: // x = 69683218960000541503257137270226944.0 return fputil::round_result_slightly_up(0x1.16bebap+5f); -#ifndef LIBC_TARGET_CPU_HAS_FMA - case 0x08ae'a356U: // x = 0x1.5d46acp-110f - return fputil::round_result_slightly_up(-0x1.07d3b4p+5f); - case 0x1c7d'a337U: // x = 0x1.fb466ep-71f - return fputil::round_result_slightly_up(-0x1.5137dp+4f); - case 0x69c8'c583U: // x = 0x1.918b06p+84f - return fputil::round_result_slightly_down(0x1.97b652p+4f); #endif // LIBC_TARGET_CPU_HAS_FMA } @@ -168,25 +170,30 @@ // Normalize denormal inputs. xbits.set_val(xbits.get_val() * 0x1.0p23f); m -= 23; + x_u = xbits.uintval(); } - m += xbits.get_unbiased_exponent(); - int f_index = xbits.get_mantissa() >> 16; + // Add unbiased exponent. + m += static_cast(x_u >> 23); + // Extract 7 leading fractional bits of the mantissa + int index = (x_u >> 16) & 0x7F; // Set bits to 1.m xbits.set_unbiased_exponent(0x7F); - FPBits f = xbits; - f.bits &= ~0x0000'FFFF; - - double d = static_cast(xbits) - static_cast(f); - d *= ONE_OVER_F[f_index]; - - double extra_factor = - fputil::multiply_add(static_cast(m), LOG10_2, LOG10_F[f_index]); - - double r = fputil::polyeval(d, extra_factor, 0x1.bcb7b1526e4c5p-2, - -0x1.bcb7b1518a5e9p-3, 0x1.287a72a6f716p-3, - -0x1.bcadb40b85565p-4, 0x1.5e0bc97f97e22p-4); + double u = static_cast(xbits); + double v = fputil::multiply_add(u, R[index], -1.0); // Exact + + // Degree-5 polynomial approximation of log10 generated by: + // > P = fpminimax(log10(1 + x)/x, 4, [|D...|], [-2^-8, 2^-7]); + constexpr double COEFFS[5] = {0x1.bcb7b1526e2e5p-2, -0x1.bcb7b1528d43dp-3, + 0x1.287a77eb4ca0dp-3, -0x1.bcb8110a181b5p-4, + 0x1.60e7e3e747129p-4}; + double v2 = v * v; // Exact + double p2 = fputil::multiply_add(v, COEFFS[4], COEFFS[3]); + double p1 = fputil::multiply_add(v, COEFFS[2], COEFFS[1]); + double p0 = fputil::multiply_add(v, COEFFS[0], LOG10_R[index]); + double r = fputil::multiply_add(static_cast(m), LOG10_2, + fputil::polyeval(v2, p0, p1, p2)); return static_cast(r); } diff --git a/libc/test/src/math/log10f_test.cpp b/libc/test/src/math/log10f_test.cpp --- a/libc/test/src/math/log10f_test.cpp +++ b/libc/test/src/math/log10f_test.cpp @@ -32,7 +32,7 @@ } TEST(LlvmLibcLog10fTest, TrickyInputs) { - constexpr int N = 15; + constexpr int N = 19; constexpr uint32_t INPUTS[N] = { 0x41200000U /*10.0f*/, 0x42c80000U /*100.0f*/, @@ -49,6 +49,10 @@ 0x08ae'a356U /*0x1.5d46acp-110f*/, 0x1c7d'a337U /*0x1.fb466ep-71f*/, 0x69c8'c583U /*0x1.918b06p+84f*/, + 0x0efe'ee7aU /*0x1.fddcf4p-98f*/, + 0x3f80'70d8U /*0x1.00e1bp0f*/, + 0x120b'93dcU /*0x1.1727b8p-91f*/, + 0x13ae'78d3U /*0x1.5cf1a6p-88f*/, }; for (int i = 0; i < N; ++i) {