diff --git a/libc/config/darwin/arm/entrypoints.txt b/libc/config/darwin/arm/entrypoints.txt --- a/libc/config/darwin/arm/entrypoints.txt +++ b/libc/config/darwin/arm/entrypoints.txt @@ -164,6 +164,7 @@ libc.src.math.ldexp libc.src.math.ldexpf libc.src.math.ldexpl + libc.src.math.log10 libc.src.math.log10f libc.src.math.log1pf libc.src.math.log2f diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -265,6 +265,7 @@ libc.src.math.ldexp libc.src.math.ldexpf libc.src.math.ldexpl + libc.src.math.log10 libc.src.math.log10f libc.src.math.log1pf libc.src.math.log2f diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -261,6 +261,7 @@ libc.src.math.llround libc.src.math.llroundf libc.src.math.llroundl + libc.src.math.log10 libc.src.math.log10f libc.src.math.log1pf libc.src.math.log2f diff --git a/libc/config/windows/entrypoints.txt b/libc/config/windows/entrypoints.txt --- a/libc/config/windows/entrypoints.txt +++ b/libc/config/windows/entrypoints.txt @@ -161,6 +161,7 @@ libc.src.math.llround libc.src.math.llroundf libc.src.math.llroundl + libc.src.math.log10 libc.src.math.log10f libc.src.math.log1pf libc.src.math.log2f diff --git a/libc/docs/math.rst b/libc/docs/math.rst --- a/libc/docs/math.rst +++ b/libc/docs/math.rst @@ -12,6 +12,11 @@ .. role:: green +.. toctree:: + :hidden: + + math/log.rst + .. contents:: Table of Contents :depth: 4 @@ -136,7 +141,7 @@ hypot :green:`XA` :green:`XA` lgamma log :green:`XA` -log10 :green:`XA` +log10 :green:`XA` :green:`XA` log1p :green:`XA` log2 :green:`XA` pow @@ -168,7 +173,7 @@ fma :green:`XA` :green:`XA` hypot :green:`XA` :green:`XA` log :green:`XA` -log10 :green:`XA` +log10 :green:`XA` :green:`XA` log1p :green:`XA` log2 :green:`XA` sin :green:`XA` large @@ -257,6 +262,11 @@ | tanhf | 13 | 55 | 57 | 123 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 22.04 LTS x86_64 | Clang 14.0.0 | FMA | +--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ +Algorithms + Implementation Details +=================================== + +* :doc:`math/log` + References ========== diff --git a/libc/docs/math/log.rst b/libc/docs/math/log.rst new file mode 100644 --- /dev/null +++ b/libc/docs/math/log.rst @@ -0,0 +1,557 @@ +.. _log_algorithm: + +======================== +Log/Log10/Log2 Algorithm +======================== + +.. default-role:: math + +In this short note, we will discuss in detail about the computation of +:math:`\log(x)` function, with double precision inputs, in particular, the range +reduction steps and error analysis. The algorithm is broken down into 2 main +phases as follow: + +1. Fast phase: + + a. Range reduction + b. Polynomial approximation + c. Ziv's test + +2. Accurate phase (if Ziv's test failed): + + a. Further range reduction + b. Polynomial approximation + + +Fast phase +========== + +Range reduction +--------------- + +Let `x = 2^{e_x} (1 + m_x)` be a normalized double precision number, in which +`-1074 \leq e_x \leq 1022` and `0 \leq m_x < 1` such that +`2^{52} m_x \in \mathbb{Z}`. + +Then from the properties of logarithm: + +.. math:: + \log(x) &= \log\left( 2^{e_x} (1 + m_x) \right) \\ + &= \log\left( 2^{e_x} \right) + \log(1 + m_x) \\ + &= e_x \log(2) + \log(1 + m_x) + +the computation of `\log(x)` can be reduced to: + +1. compute the product of `e_x` and `\log(2)`, +2. compute `\log(1 + m_x)` for `0 \leq m_x < 1`, +3. add step 1 and 2. + +To compute `\log(1 + m_x)` in step 2, we can reduce the range further by finding +`r > 0` such that: + +.. math:: + | r(1 + m_x) - 1 | < C \quad \quad \text{(R1)} + +for small `0 < C < 1`. Then if we let `u = r(1 + m_x) - 1`, `|u| < C`: + +.. math:: + \log(1 + m_x) &= \log \left( \frac{r (1 + m_x)}{r} \right) \\ + &= \log(r (1 + m_x) ) - \log(r) \\ + &= \log(1 + u) - \log(r) + +and step 2 can be computed with: + +a. extract `r` and `-\log(r)` from look-up tables, +b. compute the reduced argument `u = r(1 + m_x) - 1`, +c. compute `\log(1 + u)` by polynomial approximation or further range reduction, +d. add step a and step c results. + + +How to derive `r` +----------------- + +For an efficient implementation, we would like to use the first `M` signficicant +bits of `m_x` to look up for `r`. In particular, we would like to find a value +of `r` that works for all `m_x` satisfying: + +.. math:: + k 2^{-M} \leq m_x < (k + 1) 2^{-M} \quad \text{for some} \quad + k = 0..2^{M} - 1. \quad\quad \text{(M1)} + +Let `r = 1 + s`, then `u` can be expressed in terms of `s` as: + +.. math:: + u &= r(1 + m_x) - 1 \\ + &= (1 + s)(1 + m_x) - 1 \\ + &= s m_x + s + m_x &\quad\quad \text{(U1)} \\ + &= s (1 + m_x) + m_x \\ + &= m_x (1 + s) + s. + +From the condition `\text{(R1)}`, `s` is bounded by: + +.. math:: + \frac{-C - m_x}{1 + m_x} < s < \frac{C - m_x}{1 + m_x} \quad\quad \text{(S1)}. + +Since our reduction constant `s` must work for all `m_x` in the interval +`I = \{ v: k 2^{-M} \leq v < (k + 1) 2^{-M} \}`, `s` is bounded by: + +.. math:: + \sup_{v \in I} \frac{-C - v}{1 + v} < s < \inf_{v \in I} \frac{C - v}{1 + v} + +For a fixed constant `|c| < 1`, let `f(v) = \frac{c - v}{1 + v}`, then its +derivative is: + +.. math:: + f'(v) = \frac{(-1)(1 + v) - (1)(c - v)}{(1 + v)^2} = \frac{-1 - c}{(1 + v)^2}. + +Since `|c| < 1`, `f'(v) < 0` for all `v \neq -1`, so: + +.. math:: + \sup_{v \in I} f(v) &= f \left( \inf\{ v: v \in I \} \right) + = f \left( k 2^{-M} \right) \\ + \inf_{v \in I} f(v) &= f \left( \sup\{ v: v \in I \} \right) + = f \left( (k + 1) 2^{-M} \right) + +Hence we have the following bound on `s`: + +.. math:: + \frac{-C - k 2^{-M}}{1 + k 2^{-M}} < s \leq + \frac{C - (k + 1) 2^{-M}}{1 + (k + 1) 2^{-M}}. \quad\quad \text{(S2)} + +In order for `s` to exist, we need that: + +.. math:: + \frac{C - (k + 1) 2^{-M}}{1 + (k + 1) 2^{-M}} > + \frac{-C - k 2^{-M}}{1 + k 2^{-M}} + +which is equivalent to: + +.. math:: + \quad\quad 2C - 2^{-M} + (2k + 1) 2^{-M} C > 0 + \iff C > \frac{2^{-M - 1}}{1 + (2k + 1) 2^{-M - 1}} \quad\quad \text{(C1)}. + +Consider the case `C = 2^{-N}`. Since `0 \leq k \leq 2^M - 1,` the right hand +side of `\text{(C1)}` is bounded by: + +.. math:: + 2^{-M - 1} > \frac{2^{-M - 1}}{1 + (2k + 1) 2^{-M - 1}} \geq + \frac{2^{-M - 1}}{1 + (2^{M + 1} - 1) 2^{-M - 1}} > 2^{-M - 2}. + +Hence, from `\text{(C1)}`, being an exact power of 2, `C = 2^{-N}` is bounded below +by: + +.. math:: + C = 2^{-N} \geq 2^{-M - 1}. + +To make the range reduction efficient, we will want to minimize `C` (maximize +`N`) while keeping the required precision of `s`(`r`) as low as possible. And +for that, we will consider the following two cases: `N = M + 1` and `N = M`. + +Case 1 - `N = M + 1` +~~~~~~~~~~~~~~~~~~~~ + +When `N = M + 1`, `\text{(S2)}` becomes: + +.. math:: + \frac{-2^{-M - 1} - k 2^{-M}}{1 + k 2^{-M}} < s < + \frac{2^{-M - 1} - (k + 1) 2^{-M}}{1 + (k + 1) 2^{-M}}. + \quad\quad \text{(S2')} + +This is an interval of length: + +.. math:: + l &= \frac{2^{-M - 1} - (k + 1) 2^{-M}}{1 + (k + 1) 2^{-M}} - + \frac{-2^{-M - 1} - k 2^{-M}}{1 + k 2^{-M}} \\ + &= \frac{(2k + 1)2^{-2M - 1}}{(1 + k 2^{-M})(1 + (k + 1)2^{-M})} + \quad\quad \text{(L1)} + +As a function of `k`, the length `l` has its derivative with respect to `k`: + +.. math:: + \frac{dl}{dk} = + \frac{2^{2M + 1} - 2k(k + 1) - 1} + {2^{4M}(1 + k 2^{-M})^2 (1 + (k + 1) 2^{-M})^2} + +which is always positive for `0 \leq k \leq 2^M - 1`. So for all +`0 < k < 2^{-M}` (`k = 0` will be treated differently in edge cases), and for +`M > 2`, `l` is bounded below by: + +.. math:: + l > 2^{-2M}. + +It implies that we can always find `s` with `\operatorname{ulp}(s) = 2^{-2M}`. +And from `\text{(U1)}`, `u = s(1 + m_x) + m_x`, its `ulp` is: + +.. math:: + \operatorname{ulp}(u) &= \operatorname{ulp}(s) \cdot \operatorname{ulp}(m_x) \\ + &= 2^{-2M} \operatorname{ulp}(m_x). + +Since: + +.. math:: + |u| < C = 2^{-N} = 2^{-M - 1}, + +Its required precision is: + +.. math:: + \operatorname{prec}(u) &= \log_2(2^{-M-1} / \operatorname{ulp}(u)) \\ + &= \log_2(2^{M - 1} / \operatorname{ulp}(m_x)) \\ + &= M - 1 - \log_2(\operatorname{ulp}(m_x)). + +This means that in this case, we cannot restrict `u` to be exactly representable +in double precision for double precision input `x` with `M > 2`. Nonetheless, +for a reasonable value of `M`, we can have `u` exactly representable in double +precision for single precision input `x` (`\operatorname{ulp}(m_x) = 2^{-23}`) +such that `|u| < 2^{-M - 1}` using a look-up table of size `2^M`. + +A particular formula for `s` can be derived from `\text{(S2')}` by the midpoint +formula: + +.. math:: + s &= 2^{-2M} \operatorname{round}\left( 2^{2M} \cdot \operatorname{midpoint} + \left(-\frac{-2^{-M - 1} - k2^{-M}}{1 + k 2^{-M}}, + \frac{2^{-M-1} - (k + 1)2^{-M}}{1 + (k + 1) 2^{-M}}\right) \right) \\ + &= 2^{-2M} \operatorname{round}\left( 2^{2M} \cdot \frac{1}{2} \left( + \frac{-2^{-M - 1} - k2^{-M}}{1 + k 2^{-M}} + + \frac{2^{-M - 1} + (k + 1)2^{-M}}{1 + (k + 1) 2^{-M}} + \right) \right) \\ + &= 2^{-2M} \operatorname{round}\left( \frac{ + - \left(k + \frac{1}{2} \right) \left(2^M - k - \frac{1}{2} \right) } + {(1 + k 2^{-N})(1 + (k + 1) 2^{-N})} \right) \\ + &= - 2^{-2M} \operatorname{round}\left( \frac{ + \left(k + \frac{1}{2} \right) \left(2^M - k - \frac{1}{2} \right) } + {(1 + k 2^{-N})(1 + (k + 1) 2^{-N})} \right) \quad\quad \text{(S3)} + +The corresponding range and formula for `r = 1 + s` are: + +.. math:: + \frac{1 - 2^{-M - 1}}{1 + k 2^{-M}} < r \leq + \frac{1 + 2^{-M - 1}}{1 + (k + 1) 2^{-M}} + +.. math:: + r &= 2^{-2M} \operatorname{round}\left( 2^{2M} \cdot + \operatorname{midpoint}\left( \frac{1 - 2^{-M - 1}}{1 + k 2^{-M}}, + \frac{1 + 2^{-M - 1}}{1 + (k + 1) 2^{-M}}\right) \right) \\ + &= 2^{-2M} \operatorname{round}\left( 2^{2M} \cdot \frac{1}{2} \left( + \frac{1 + 2^{-M-1}}{1 + (k + 1) 2^{-M}} + \frac{1 - 2^{-M-1}}{1 + k 2^{-M}} + \right) \right) \\ + &= 2^{-2M} \operatorname{round}\left( 2^{2M} \cdot \frac{ + 1 + \left(k + \frac{1}{2} \right) 2^{-M} - 2^{-2M-2} }{(1 + k 2^{-M}) + (1 + (k + 1) 2^{-M})} \right) + +Case 1 - `N = M` +~~~~~~~~~~~~~~~~ + +When `N = M`, `\text{(S2)}` becomes: + +.. math:: + \frac{-(k + 1)2^{-M}}{1 + k 2^{-M}} < s < \frac{-k 2^{-M}}{1 + (k + 1) 2^{-M}} + \quad\quad \text{(S2")} + +This is an interval of length: + +.. math:: + l &= \frac{- k 2^{-M}}{1 + (k + 1) 2^{-M}} - + \frac{- (k + 1) 2^{-M}}{1 + k 2^{-M}} \\ + &= \frac{2^{-M} (1 + (2k + 1) 2^{-M})}{(1 + k 2^{-M})(1 + (k + 1)2^{-M})} + \quad\quad \text{(L1')} + +As a function of `k`, its derivative with respect to `k`: + +.. math:: + \frac{dl}{dk} = + -\frac{2^{-2M}(k(k + 1)2^{-M + 1} + 2^{-M} + 2k + 1)} + {(1 + k 2^{-M})^2 (1 + (k + 1) 2^{-M})^2} + +which is always negative for `0 \leq k \leq 2^M - 1`. So for `M > 1`, `l` is +bounded below by: + +.. math:: + l > \frac{2^{-M - 1} (3 - 2^{-M})}{2 - 2^{-M}} > 2^{-M - 1}. + +It implies that we can always find `s` with `\operatorname{ulp}(s) = 2^{-M-1}`. +And from `\text{(U1)}`, `u = s(1 + m_x) + m_x`, its `ulp` is: + +.. math:: + \operatorname{ulp}(u) &= \operatorname{ulp}(s) \cdot \operatorname{ulp}(m_x) \\ + &= 2^{-M - 1} \operatorname{ulp}(m_x). + +Since: + +.. math:: + |u| < C = 2^{-N} = 2^{-M}, + +Its required precision is: + +.. math:: + \operatorname{prec}(u) &= \log_2(2^{-M} / \operatorname{ulp}(u)) \\ + &= \log_2(2 / \operatorname{ulp}(m_x)) \\ + &= 1 - \log_2(\operatorname{ulp}(m_x)). + +Hence, for double precision `x`, `\operatorname{ulp}(m_x) = 2^{-52}`, and the +precision needed for `u` is `\operatorname{prec}(u) = 53`, i.e., `u` can be +exactly representable in double precision. And in this case, `s` can be +derived from `\text{(S2")}` by the midpoint formula: + +.. math:: + s &= 2^{-M - 1} \operatorname{round}\left( 2^{M + 1} \cdot + \operatorname{midpoint} \left(-\frac{-(k + 1)2^{-M}}{1 + k 2^{-M}}, + \frac{-k2^{-M}}{1 + (k + 1) 2^{-M}}\right) \right) \\ + &= 2^{-M - 1} \operatorname{round}\left( 2^{M + 1} \cdot \frac{1}{2} \left( + \frac{-(k + 1)2^{-M}}{1 + k 2^{-M}} + \frac{-k2^{-M}}{1 + (k + 1) 2^{-M}} + \right) \right) \\ + &= -2^{-M - 1} \operatorname{round}\left( \frac{ + (2k + 1) + (2k^2 + 2k + 1) 2^{-M} } + {(1 + k 2^{-N})(1 + (k + 1) 2^{-N})} \right) \quad\quad \text{(S3')} + +The corresponding range and formula for `r = 1 + s` are: + +.. math:: + \frac{1 - 2^{-M}}{1 + k 2^{-M}} < r \leq \frac{1 + 2^{-M}}{1 + (k + 1) 2^{-M}} + +.. math:: + r &= 2^{-M-1} \operatorname{round}\left( 2^{M + 1} \cdot + \operatorname{midpoint}\left( \frac{1 - 2^{-M}}{1 + k 2^{-M}}, + \frac{1 + 2^{-M}}{1 + (k + 1) 2^{-M}}\right) \right) \\ + &= 2^{-M-1} \operatorname{round}\left( 2^{M + 1} \cdot \frac{1}{2} \left( + \frac{1 + 2^{-M}}{1 + (k + 1) 2^{-M}} + \frac{1 - 2^{-M}}{1 + k 2^{-M}} + \right) \right) \\ + &= 2^{-M - 1} \operatorname{round}\left( 2^{M + 1} \cdot \frac{ + 1 + \left(k + \frac{1}{2} \right) 2^{-M} - 2^{-2M-1} }{(1 + k 2^{-M}) + (1 + (k + 1) 2^{-M})} \right) + +Edge cases +---------- + +1. When `k = 0`, notice that: + +.. math:: + 0 = k 2^{-N} \leq m_x < (k + 1) 2^{-N} = 2^{-N} = C, + +so we can simply choose `r = 1` so that `\log(r) = 0` is exact, then `u = m_x`. +This will help reduce the accumulated errors when `m_x` is close to 0 while +maintaining the range reduction output's requirements. + +2. When `k = 2^{N} - 1`, `\text{(S2)}` becomes: + +.. math:: + -\frac{1}{2} - \frac{C - 2^{-M-1}}{2 - 2^{-M}} <> s \leq + -\frac{1}{2} + \frac{C}{2}. + +so when `C > 2^{-M - 1}` is a power of 2, we can always choose: + +.. math:: + s = -\frac{1}{2}, \quad \text{i.e.} \quad r = \frac{1}{2}. + +This reduction works well to avoid catastropic cancellation happening when +`e_x = -1`. + +This also works when `C = 2^{-M - 1}` if we relax the condition on `u` to +`|u| \leq C = 2^{-M-1}`. + +Intermediate precision, and Ziv's test +-------------------------------------- + +In the fast phase, we want extra precision while performant, so we use +double-double precision for most intermediate computation steps, and employ Ziv +test to see if the result is accurate or not. In our case, the Ziv's test can +be described as follow: + +1. Let `re = re.hi + re.lo` be the double-double output of the fast phase + computation. +2. Let `err` be an estimated upper bound of the errors of `re`. +3. If `\circ(re.hi + (re.lo - err)) == \circ(re.hi + (r.lo + err))` then the + result is correctly rounded to double precision for the current rounding mode + `\circ`. Otherwise, the accurate phase with extra precision is needed. + +For an easy and cheap estimation of the error bound `err`, since the range +reduction step described above is accurate, the errors of the result: + +.. math:: + \log(x) &= e_x \log(2) - \log(r) + \log(1 + u) \\ + &\approx e_x \log(2) - \log(r) + u P(u) + +come from 2 parts: + +1. the look-up part: `e_x \log(2) - \log(r)` +2. the polynomial approximation part: `u P(u)` + +The errors of the first part can be computed with a single `\operatorname{fma}` +operation: + +.. math:: + err_1 = \operatorname{fma}(e_x, err(\log(2)), err(\log(r))), + +and then combining with the errors of the second part for another +`\operatorname{fma}` operation: + +.. math:: + err = \operatorname{fma}(u, err(P), err_1) + + +Accurate phase +============== + +Extending range reduction +------------------------- + +Since the output `u = r(1 + m_x) - 1` of the fast phase's range reduction +is computed exactly, we can apply further range reduction steps by +using the following formula: + +.. math:: + u_{i + 1} = r_i(1 + u_i) - 1 = u_i \cdot r_i + (r_i - 1), + +where `|u_i| < 2^{-N_i}` and `u_0 = u` is representable in double precision. + +Let `s_i = r_i - 1`, then we can rewrite it as: + +.. math:: + u_{i + 1} &= (1 + s_i)(1 + u_i) - 1 \\ + &= s_i u_i + u_i + s_i \\ + &= u_i (1 + s_i) + s_i + &= s_i (1 + u_i) + u_i. + +Then the bound on `u_{i + 1}` is translated to `s_i` as: + +.. math:: + \frac{-2^{-N_{i + 1}} - u_i}{1 + u_i} < s_i < \frac{2^{-N_{i + 1}} - u_i}{1 + u_i}. + +Let say we divide the interval `[0, 2^-{N_i})` into `2^{M_i}` subintervals +evenly and use the index `k` such that: + +.. math:: + k 2^{-N_i - M_i} \leq u_i < (k + 1) 2^{-N_i - M_i}, + +to look-up for the reduction constant `s_{i, k}`. In other word, `k` is given +by the formula: + +.. math:: + k = \left\lfloor 2^{N_i + M_i} u_i \right\rfloor + +Notice that our reduction constant `s_{i, k}` must work for all `u_i` in the +interval `I = \{ v: k 2^{-N_i - M_i} \leq v < (k + 1) 2^{-N_i - M_i} \}`, +so it is bounded by: + +.. math:: + \sup_{v \in I} \frac{-2^{-N_{i + 1}} - v}{1 + v} < s_{i, k} < \inf_{v \in I} \frac{2^{-N_{i + 1}} - v}{1 + v} + +For a fixed constant `|C| < 1`, let `f(v) = \frac{C - v}{1 + v}`, then its derivative +is: + +.. math:: + f'(v) = \frac{(-1)(1 + v) - (1)(C - v)}{(1 + v)^2} = \frac{-1 - C}{(1 + v)^2}. + +Since `|C| < 1`, `f'(v) < 0` for all `v \neq -1`, so: + +.. math:: + \sup_{v \in I} f(v) &= f \left( \inf\{ v: v \in I \} \right) + = f \left( k 2^{-N_i - M_i} \right) \\ + \inf_{v \in I} f(v) &= f \left( \sup\{ v: v \in I \} \right) + = f \left( (k + 1) 2^{-N_i - M_i} \right) + +Hence we have the following bound on `s_{i, k}`: + +.. math:: + \frac{-2^{-N_{i + 1}} - k 2^{-N_i - M_i}}{1 + k 2^{-N_i - M_i}} < s_{i, k} + \leq \frac{2^{-N_{i + 1}} - (k + 1) 2^{-N_i - M_i}}{1 + (k + 1) 2^{-N_i - M_i}} + +This interval is of length: + +.. math:: + l &= \frac{2^{-N_{i + 1}} - (k + 1) 2^{-N_i - M_i}}{1 + (k + 1) 2^{-N_i - M_i}} - + \frac{-2^{-N_{i + 1}} - k 2^{-N_i - M_i}}{1 + k 2^{-N_i - M_i}} \\ + &= \frac{2^{-N_{i + 1} + 1} - 2^{-N_i - M_i} + (2k + 1) 2^{-N_{i + 1} - N_i - M_i}} + {(1 + k 2^{-N_i - M_i})(1 + (k + 1) 2^{-N_i -M_i})} + +So in order to be able to find `s_{i, k}`, we need that: + +.. math:: + 2^{-N_{i + 1} + 1} - 2^{-N_i - M_i} + (2k + 1) 2^{-N_{i + 1} - N_i - M_i} > 0 + +This give us the following bound on `N_{i + 1}`: + +.. math:: + N_{i + 1} \leq N_i + M_i + 1. + +To make the range reduction effective, we will want to maximize `N_{i + 1}`, so +let consider the two cases: `N_{i + 1} = N_i + M_i + 1` and +`N_{i + 1} = N_i + M_i`. + + + +The optimal choice to balance between maximizing `N_{i + 1}` and minimizing the +precision needed for `s_{i, k}` is: + +.. math:: + N_{i + 1} = N_i + M_i, + +and in this case, the optimal `\operatorname{ulp}(s_{i, k})` is: + +.. math:: + \operatorname{ulp}(s_{i, k}) = 2^{-N_i - M_i} + +and the corresponding `\operatorname{ulp}(u_{i + 1})` is: + +.. math:: + \operatorname{ulp}(u_{i + 1}) &= \operatorname{ulp}(u_i) \operatorname{ulp}(s_{i, k}) \\ + &= \operatorname{ulp}(u_i) \cdot 2^{-N_i - M_i} \\ + &= \operatorname{ulp}(u_0) \cdot 2^{-N_0 - M_0} \cdot 2^{-N_0 - M_0 - M_1} \cdots 2^{-N_0 - M_0 - M_1 - \cdots - M_i} \\ + &= 2^{-N_0 - 53} \cdot 2^{-N_0 - M_0} \cdot 2^{-N_0 - M_0 - M_1} \cdots 2^{-N_0 - M_0 - M_1 - \cdots - M_i} + +Since `|u_{i + 1}| < 2^{-N_{i + 1}} = 2^{-N_0 - M_1 - ... -M_i}`, the precision +of `u_{i + 1}` is: + +.. math:: + \operatorname{prec}(u_{i + 1}) &= (N_0 + 53) + (N_0 + M_0) + \cdots + + (N_0 + M_0 + \cdots + M_i) - (N_0 + M_0 + \cdots + M_i) \\ + &= (i + 1) N_0 + i M_0 + (i - 1) M_1 + \cdots + M_{i - 1} + 53 + +If we choose to have the same `M_0 = M_1 = \cdots = M_i = M`, this can be +simplified to: + +.. math:: + \operatorname{prec}(u_{i + 1}) = (i + 1) N_0 + \frac{i(i + 1)}{2} \cdot M + 53. + +We summarize the precision analysis for extending the range reduction in the +table below: + ++-------+-----+-----------+------------+--------------+-----------------+-------------------+ +| `N_0` | `M` | No. steps | Table size | Output bound | ulp(`s_{i, k}`) | prec(`u_{i + 1}`) | ++-------+-----+-----------+------------+--------------+-----------------+-------------------+ +| 7 | 4 | 1 | 32 | `2^{-11}` | `2^{-12}` | 60 | +| | +-----------+------------+--------------+-----------------+-------------------+ +| | | 2 | 64 | `2^{-15}` | `2^{-16}` | 71 | +| | +-----------+------------+--------------+-----------------+-------------------+ +| | | 3 | 96 | `2^{-19}` | `2^{-20}` | 86 | +| | +-----------+------------+--------------+-----------------+-------------------+ +| | | 4 | 128 | `2^{-23}` | `2^{-24}` | 105 | +| | +-----------+------------+--------------+-----------------+-------------------+ +| | | 5 | 160 | `2^{-27}` | `2^{-28}` | 128 | +| | +-----------+------------+--------------+-----------------+-------------------+ +| | | 6 | 192 | `2^{-31}` | `2^{-32}` | 155 | +| +-----+-----------+------------+--------------+-----------------+-------------------+ +| | 5 | 3 | 192 | `2^{-22}` | `2^{-23}` | 89 | +| | +-----------+------------+--------------+-----------------+-------------------+ +| | | 4 | 256 | `2^{-27}` | `2^{-28}` | 111 | +| | +-----------+------------+--------------+-----------------+-------------------+ +| | | 5 | 320 | `2^{-32}` | `2^{-33}` | 138 | +| | +-----------+------------+--------------+-----------------+-------------------+ +| | | 6 | 384 | `2^{-37}` | `2^{-38}` | 170 | +| +-----+-----------+------------+--------------+-----------------+-------------------+ +| | 6 | 3 | 384 | `2^{-25}` | `2^{-26}` | 92 | +| | +-----------+------------+--------------+-----------------+-------------------+ +| | | 4 | 512 | `2^{-31}` | `2^{-32}` | 117 | +| +-----+-----------+------------+--------------+-----------------+-------------------+ +| | 7 | 1 | 256 | `2^{-24}` | `2^{-15}` | 60 | +| | +-----------+------------+--------------+-----------------+-------------------+ +| | | 2 | 512 | `2^{-21}` | `2^{-22}` | 74 | ++-------+-----+-----------+------------+--------------+-----------------+-------------------+ + +where: + +- Number of steps = `i + 1` +- Table size = `(i + 1) 2^{M + 1}` +- Output bound = `2^{-N_{i + 1}} = 2^{-N_0 - (i + 1) M}` +- `\operatorname{ulp}(s_{i, k}) = 2^{-N_{i + 1} - 1}` +- `\operatorname{prec}(u_{i + 1}) = (i + 1) N_0 + \frac{i(i + 1)}{2} \cdot M + 53` diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -405,6 +405,7 @@ FunctionSpec<"ldexpf", RetValSpec, [ArgSpec, ArgSpec]>, FunctionSpec<"ldexpl", RetValSpec, [ArgSpec, ArgSpec]>, + FunctionSpec<"log10", RetValSpec, [ArgSpec]>, FunctionSpec<"log10f", RetValSpec, [ArgSpec]>, FunctionSpec<"log1pf", RetValSpec, [ArgSpec]>, diff --git a/libc/src/__support/FPUtil/CMakeLists.txt b/libc/src/__support/FPUtil/CMakeLists.txt --- a/libc/src/__support/FPUtil/CMakeLists.txt +++ b/libc/src/__support/FPUtil/CMakeLists.txt @@ -178,6 +178,16 @@ ROUND_OPT ) +add_header_library( + double_double + HDRS + double_double.h + DEPENDS + libc.src.__support.common + libc.src.__support.number_pair + .multiply_add +) + add_header_library( dyadic_float HDRS diff --git a/libc/src/__support/FPUtil/double_double.h b/libc/src/__support/FPUtil/double_double.h new file mode 100644 --- /dev/null +++ b/libc/src/__support/FPUtil/double_double.h @@ -0,0 +1,52 @@ +//===-- Utilities for double-double data type. ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_SUPPORT_FPUTIL_DOUBLEDOUBLE_H +#define LLVM_LIBC_SRC_SUPPORT_FPUTIL_DOUBLEDOUBLE_H + +#include "multiply_add.h" +#include "src/__support/number_pair.h" + +namespace __llvm_libc::fputil { + +using DoubleDouble = __llvm_libc::NumberPair; + +// Assumption: |a| >= |b| +constexpr inline DoubleDouble exact_add(double a, double b) { + DoubleDouble r{0.0, 0.0}; + r.hi = a + b; + double t = r.hi - a; + r.lo = b - t; + return r; +} + +// Assumption: |a.hi| >= |b.hi| +constexpr inline DoubleDouble add(DoubleDouble a, DoubleDouble b) { + DoubleDouble r = exact_add(a.hi, b.hi); + double lo = a.lo + b.lo; + return exact_add(r.hi, r.lo + lo); +} + +// Assumption: |a.hi| >= |b| +constexpr inline DoubleDouble add(DoubleDouble a, double b) { + DoubleDouble r = exact_add(a.hi, b); + return exact_add(r.hi, r.lo + a.lo); +} + +// TODO(lntue): add a correct multiplication when FMA instructions are not +// available. +constexpr inline DoubleDouble exact_mult(double a, double b) { + DoubleDouble r{0.0, 0.0}; + r.hi = a * b; + r.lo = fputil::multiply_add(a, b, -r.hi); + return r; +} + +} // namespace __llvm_libc::fputil + +#endif // LLVM_LIBC_SRC_SUPPORT_FPUTIL_DOUBLEDOUBLE_H diff --git a/libc/src/__support/FPUtil/dyadic_float.h b/libc/src/__support/FPUtil/dyadic_float.h --- a/libc/src/__support/FPUtil/dyadic_float.h +++ b/libc/src/__support/FPUtil/dyadic_float.h @@ -54,7 +54,7 @@ DyadicFloat(bool s, int e, MantissaType m) : sign(s), exponent(e), mantissa(m) { normalize(); - }; + } // Normalizing the mantissa, bringing the leading 1 bit to the most // significant bit. diff --git a/libc/src/__support/number_pair.h b/libc/src/__support/number_pair.h --- a/libc/src/__support/number_pair.h +++ b/libc/src/__support/number_pair.h @@ -18,8 +18,6 @@ DEFINE_NAMED_PAIR_TEMPLATE(NumberPair, lo, hi); -using DoubleDouble = NumberPair; - template cpp::enable_if_t && cpp::is_unsigned_v, NumberPair> split(T a) { diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -131,6 +131,7 @@ add_math_entrypoint_object(ldexpf) add_math_entrypoint_object(ldexpl) +add_math_entrypoint_object(log10) add_math_entrypoint_object(log10f) add_math_entrypoint_object(log1pf) diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -756,6 +756,22 @@ common_constants.cpp ) +# TODO(lntue): Make log10 correctly rounded for non-FMA targets. +add_entrypoint_object( + log10 + SRCS + log10.cpp + HDRS + ../log10.h + DEPENDS + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.double_double + libc.src.__support.FPUtil.dyadic_float + COMPILE_OPTIONS + -O3 +) + add_entrypoint_object( log10f SRCS diff --git a/libc/src/math/generic/log10.cpp b/libc/src/math/generic/log10.cpp new file mode 100644 --- /dev/null +++ b/libc/src/math/generic/log10.cpp @@ -0,0 +1,1084 @@ +//===-- Double-precision log10(x) function --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/log10.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/double_double.h" +#include "src/__support/FPUtil/dyadic_float.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/common.h" + +namespace __llvm_libc { + +// 192-bit precision dyadic floating point numbers. +using Float192 = typename fputil::DyadicFloat<192>; +using MType = typename Float192::MantissaType; + +namespace { + +// log10(2) generated by Sollya with: +// > a = round(log10(2), 43, RN); +constexpr double LOG10_2_HI = 0x1.34413509f78p-2; // LSB = 2^-43 +// > b = round(log10(2) - a, D, RN); +constexpr double LOG10_2_LO = 0x1.fef311f12b358p-46; // LSB = 2^-98 +constexpr double LOG10_2_ULP[2] = {0x1.0p-97, 0.0}; + +// Generated by Sollya with: +// > for i from 0 to 127 do { +// r = 2^-8 * nearestint( 2^8 * (1 + (i + 0.5)*2^(-7) - 2^(-15)) / +// ((1 + i*2^-7)*(1 + (i + 1)*2^-7)) ); +// print(r, ","); +// }; +// To improve the accuracy with inputs close to 1, we replace R[0] with 1.0. +constexpr double R[128] = { + 1.0, 0x1.fap-1, 0x1.f6p-1, 0x1.f2p-1, 0x1.eep-1, 0x1.eap-1, 0x1.e8p-1, + 0x1.e4p-1, 0x1.ep-1, 0x1.dcp-1, 0x1.dap-1, 0x1.d6p-1, 0x1.d2p-1, 0x1.dp-1, + 0x1.ccp-1, 0x1.c8p-1, 0x1.c6p-1, 0x1.c2p-1, 0x1.cp-1, 0x1.bcp-1, 0x1.bap-1, + 0x1.b6p-1, 0x1.b4p-1, 0x1.bp-1, 0x1.aep-1, 0x1.aap-1, 0x1.a8p-1, 0x1.a6p-1, + 0x1.a2p-1, 0x1.ap-1, 0x1.9ep-1, 0x1.9ap-1, 0x1.98p-1, 0x1.96p-1, 0x1.94p-1, + 0x1.9p-1, 0x1.8ep-1, 0x1.8cp-1, 0x1.8ap-1, 0x1.88p-1, 0x1.84p-1, 0x1.82p-1, + 0x1.8p-1, 0x1.7ep-1, 0x1.7cp-1, 0x1.7ap-1, 0x1.78p-1, 0x1.76p-1, 0x1.74p-1, + 0x1.72p-1, 0x1.7p-1, 0x1.6ep-1, 0x1.6cp-1, 0x1.6ap-1, 0x1.68p-1, 0x1.66p-1, + 0x1.64p-1, 0x1.62p-1, 0x1.6p-1, 0x1.5ep-1, 0x1.5cp-1, 0x1.5ap-1, 0x1.58p-1, + 0x1.56p-1, 0x1.54p-1, 0x1.52p-1, 0x1.5p-1, 0x1.5p-1, 0x1.4ep-1, 0x1.4cp-1, + 0x1.4ap-1, 0x1.48p-1, 0x1.46p-1, 0x1.46p-1, 0x1.44p-1, 0x1.42p-1, 0x1.4p-1, + 0x1.3ep-1, 0x1.3ep-1, 0x1.3cp-1, 0x1.3ap-1, 0x1.38p-1, 0x1.38p-1, 0x1.36p-1, + 0x1.34p-1, 0x1.32p-1, 0x1.32p-1, 0x1.3p-1, 0x1.2ep-1, 0x1.2ep-1, 0x1.2cp-1, + 0x1.2ap-1, 0x1.2ap-1, 0x1.28p-1, 0x1.26p-1, 0x1.26p-1, 0x1.24p-1, 0x1.22p-1, + 0x1.22p-1, 0x1.2p-1, 0x1.1ep-1, 0x1.1ep-1, 0x1.1cp-1, 0x1.1cp-1, 0x1.1ap-1, + 0x1.18p-1, 0x1.18p-1, 0x1.16p-1, 0x1.16p-1, 0x1.14p-1, 0x1.12p-1, 0x1.12p-1, + 0x1.1p-1, 0x1.1p-1, 0x1.0ep-1, 0x1.0ep-1, 0x1.0cp-1, 0x1.0ap-1, 0x1.0ap-1, + 0x1.08p-1, 0x1.08p-1, 0x1.06p-1, 0x1.06p-1, 0x1.04p-1, 0x1.04p-1, 0x1.02p-1, + 0x1.02p-1, 0x1p-1, +}; + +// Generated by Sollya with: +// for i from 0 to 127 do { +// r = 2^-8 * nearestint( 2^8 * (1 + (i + 0.5)*2^(-7) - 2^(-15)) / +// ((1 + i*2^-7)*(1 + (i + 1)*2^-7)) ); +// b = nearestint(log10(r)*2^43) * 2^-43; +// c = round(log10(r) - b, D, RN); +// print("{", -c, ",", -b, "},"); +// }; +// We replace LOG10_R[0] with log10(1.0) == 0.0 +constexpr fputil::DoubleDouble LOG10_R[128] = { + {0.0, 0.0}, + {-0x1.dfa6d47e47379p-45, 0x1.4f8205236p-8}, + {-0x1.11bc6f2b9a3acp-45, 0x1.18b2dc8d3p-7}, + {-0x1.685fc114e61bfp-46, 0x1.8a8c06bb2p-7}, + {0x1.3c757d5b7376ap-45, 0x1.fd503c39p-7}, + {0x1.f3cbd9c5111b1p-47, 0x1.3881a7b818p-6}, + {-0x1.e22fab794a816p-45, 0x1.559bd2407p-6}, + {0x1.421bab5f034a4p-45, 0x1.902c31d628p-6}, + {0x1.fedb4b594a31bp-45, 0x1.cb38fccd88p-6}, + {-0x1.c4e824b246c7fp-47, 0x1.0362241e64p-5}, + {-0x1.df23ed2ebe477p-45, 0x1.125d0432ecp-5}, + {-0x1.00c12f7a1b586p-47, 0x1.30838cdc3p-5}, + {0x1.e5ff3439d368dp-46, 0x1.4eec0e2458p-5}, + {0x1.2951bb9cd2fb7p-45, 0x1.5e3966b7e8p-5}, + {0x1.fae96a708581ep-46, 0x1.7d070145f4p-5}, + {0x1.badcf3d6e4566p-46, 0x1.9c197abfp-5}, + {-0x1.b0197d2cb982ep-48, 0x1.abbcebd85p-5}, + {-0x1.24b4a6b5ce4d4p-52, 0x1.cb38fccd8cp-5}, + {-0x1.40bcd23c3e44cp-45, 0x1.db11ed766cp-5}, + {-0x1.024e9d08ce301p-45, 0x1.fafa6d398p-5}, + {0x1.77c3e779b9fcfp-48, 0x1.0585283764p-4}, + {0x1.a9a57734f2038p-48, 0x1.15b11a094ap-4}, + {-0x1.d227d61f9e88dp-45, 0x1.1dd5460c8cp-4}, + {0x1.d288560689912p-53, 0x1.2e3a740b78p-4}, + {-0x1.df5de49ddb16p-46, 0x1.367ba3aaa2p-4}, + {0x1.5b873a39e56dcp-47, 0x1.471ba8a7dep-4}, + {0x1.75da8a5871b9ap-45, 0x1.4f7aad9bbcp-4}, + {0x1.ef5f3c10990f4p-45, 0x1.57e3d47c3ap-4}, + {-0x1.02359c584ac3cp-45, 0x1.68d4eaf26ep-4}, + {-0x1.41149840eaa65p-46, 0x1.715d0ce368p-4}, + {-0x1.ff281b9601ce6p-46, 0x1.79efb57b1p-4}, + {0x1.2b9da13d5c8cbp-47, 0x1.8b350364c6p-4}, + {-0x1.80743406505e6p-48, 0x1.93e7de0fc4p-4}, + {-0x1.76b169f6b4949p-49, 0x1.9ca5aa172ap-4}, + {-0x1.bc8889d0fe1ap-47, 0x1.a56e8325f6p-4}, + {-0x1.03ad4133e8c4cp-45, 0x1.b721cd1716p-4}, + {0x1.72a4e1d198491p-46, 0x1.c00c776722p-4}, + {-0x1.ddd18dedb6656p-45, 0x1.c902a19e66p-4}, + {0x1.5e533080ecf32p-47, 0x1.d204698cb4p-4}, + {0x1.7e865b8783768p-45, 0x1.db11ed766ap-4}, + {0x1.5f7ef576ada0cp-45, 0x1.ed50a4a26ep-4}, + {0x1.c9a3bd0891bccp-46, 0x1.f68216c9ccp-4}, + {-0x1.ff229f20ed3d2p-46, 0x1.ffbfc2bbc8p-4}, + {-0x1.6f30673aae7efp-45, 0x1.0484e4942bp-3}, + {0x1.dae5ed5e3f34cp-45, 0x1.093025a199p-3}, + {-0x1.3eea49e637bb3p-45, 0x1.0de1b56357p-3}, + {0x1.82c6326f70b35p-46, 0x1.1299a4fb3ep-3}, + {0x1.f04d633b79054p-45, 0x1.175805d158p-3}, + {0x1.8b891b6d05a73p-48, 0x1.1c1ce9955cp-3}, + {-0x1.35ca658049a0ap-51, 0x1.20e8624039p-3}, + {0x1.ff081a4e81f0bp-45, 0x1.25ba8215afp-3}, + {0x1.1e3f04f63ee01p-45, 0x1.2a935ba5f1p-3}, + {-0x1.e1471e5cb397ep-45, 0x1.2f7301cf4fp-3}, + {-0x1.5bd54fd6eb7d9p-45, 0x1.345987bfefp-3}, + {0x1.fe93f791a7264p-46, 0x1.394700f795p-3}, + {-0x1.8aebce3ec8738p-45, 0x1.3e3b814974p-3}, + {0x1.b0722aa2559f2p-45, 0x1.43371cde07p-3}, + {-0x1.bcb784188a058p-46, 0x1.4839e83507p-3}, + {0x1.20ca9a2bcc728p-45, 0x1.4d43f8275ap-3}, + {0x1.bb95ec8fd8f68p-45, 0x1.525561e925p-3}, + {0x1.4e036062e2e73p-48, 0x1.576e3b0bdep-3}, + {-0x1.e560b5e7a02b4p-51, 0x1.5c8e998073p-3}, + {0x1.1e472c751a4cp-49, 0x1.61b6939983p-3}, + {-0x1.1116ad28239bp-48, 0x1.66e6400da4p-3}, + {0x1.9ad1d9e405fb9p-46, 0x1.6c1db5f9bbp-3}, + {-0x1.41149840eaa65p-45, 0x1.715d0ce368p-3}, + {0x1.bfb1de334b1cbp-45, 0x1.76a45cbb7ep-3}, + {0x1.bfb1de334b1cbp-45, 0x1.76a45cbb7ep-3}, + {-0x1.9fc01708d86e7p-48, 0x1.7bf3bde09ap-3}, + {0x1.4adaf7fe992b6p-45, 0x1.814b4921bdp-3}, + {-0x1.c0b434f5d2b7ap-46, 0x1.86ab17c10cp-3}, + {0x1.4c7acd659652fp-45, 0x1.8c13437695p-3}, + {0x1.3e99da1b485cfp-45, 0x1.9183e67339p-3}, + {0x1.3e99da1b485cfp-45, 0x1.9183e67339p-3}, + {-0x1.fb7d8e74e02ap-46, 0x1.96fd1b63ap-3}, + {0x1.7c9690248757ep-46, 0x1.9c7efd734ap-3}, + {-0x1.0cee0ed4ca7e9p-52, 0x1.a209a84fbdp-3}, + {0x1.d924eb1fec6c5p-47, 0x1.a79d382bc2p-3}, + {0x1.d924eb1fec6c5p-47, 0x1.a79d382bc2p-3}, + {0x1.fe6eb5a4df1dfp-49, 0x1.ad39c9c2c6p-3}, + {0x1.4c9cce6dd603bp-46, 0x1.b2df7a5c5p-3}, + {-0x1.a01b9bb0ebf1bp-45, 0x1.b88e67cf98p-3}, + {-0x1.a01b9bb0ebf1bp-45, 0x1.b88e67cf98p-3}, + {0x1.2ee896e06dbe8p-45, 0x1.be46b08735p-3}, + {-0x1.ff2381071d23ep-49, 0x1.c4087384f5p-3}, + {-0x1.2f9fd61140aa6p-45, 0x1.c9d3d065c6p-3}, + {-0x1.2f9fd61140aa6p-45, 0x1.c9d3d065c6p-3}, + {-0x1.2386ad8b819b8p-45, 0x1.cfa8e765ccp-3}, + {0x1.d63da3ac9f0d5p-45, 0x1.d587d96494p-3}, + {0x1.d63da3ac9f0d5p-45, 0x1.d587d96494p-3}, + {0x1.fcc16f3ba09cbp-45, 0x1.db70c7e96ep-3}, + {-0x1.cc52c1ba2d838p-45, 0x1.e163d527e7p-3}, + {-0x1.cc52c1ba2d838p-45, 0x1.e163d527e7p-3}, + {0x1.f97877007c127p-46, 0x1.e76124046bp-3}, + {0x1.fbd42fdc335fdp-47, 0x1.ed68d81919p-3}, + {0x1.fbd42fdc335fdp-47, 0x1.ed68d81919p-3}, + {-0x1.cbc0789055864p-45, 0x1.f37b15bab1p-3}, + {0x1.2737df7f29668p-45, 0x1.f99801fdb7p-3}, + {0x1.2737df7f29668p-45, 0x1.f99801fdb7p-3}, + {-0x1.ff229f20ed3d2p-45, 0x1.ffbfc2bbc8p-3}, + {0x1.00809c16ae3ecp-46, 0x1.02f93f4c87p-2}, + {0x1.00809c16ae3ecp-46, 0x1.02f93f4c87p-2}, + {-0x1.aa1358e87a6b4p-45, 0x1.06182e84fd8p-2}, + {-0x1.aa1358e87a6b4p-45, 0x1.06182e84fd8p-2}, + {-0x1.f171b2c5f2274p-48, 0x1.093cc32c91p-2}, + {-0x1.42d6ae59e7d9cp-45, 0x1.0c6711d6acp-2}, + {-0x1.42d6ae59e7d9cp-45, 0x1.0c6711d6acp-2}, + {0x1.eac1871dbdbbfp-45, 0x1.0f972f87ffp-2}, + {0x1.eac1871dbdbbfp-45, 0x1.0f972f87ffp-2}, + {0x1.feed957c16a44p-46, 0x1.12cd31b9c98p-2}, + {0x1.a6023a51d15b6p-46, 0x1.16092e5d3a8p-2}, + {0x1.a6023a51d15b6p-46, 0x1.16092e5d3a8p-2}, + {0x1.cefc5208422d8p-45, 0x1.194b3bdef68p-2}, + {0x1.cefc5208422d8p-45, 0x1.194b3bdef68p-2}, + {-0x1.1d4f1e8c2daffp-55, 0x1.1c93712abc8p-2}, + {-0x1.1d4f1e8c2daffp-55, 0x1.1c93712abc8p-2}, + {0x1.40eb9b53054c3p-46, 0x1.1fe1e5af2cp-2}, + {0x1.9bbc8038401fcp-45, 0x1.2336b161b3p-2}, + {0x1.9bbc8038401fcp-45, 0x1.2336b161b3p-2}, + {0x1.09ca54daae9f9p-48, 0x1.2691ecc29fp-2}, + {0x1.09ca54daae9f9p-48, 0x1.2691ecc29fp-2}, + {0x1.2b528446968a4p-48, 0x1.29f3b0e1558p-2}, + {0x1.2b528446968a4p-48, 0x1.29f3b0e1558p-2}, + {-0x1.4548507c3dd04p-46, 0x1.2d5c1760b88p-2}, + {-0x1.4548507c3dd04p-46, 0x1.2d5c1760b88p-2}, + {-0x1.db59b99249f3ap-46, 0x1.30cb3a7bb38p-2}, + {-0x1.db59b99249f3ap-46, 0x1.30cb3a7bb38p-2}, + {0x1.fef311f12b358p-46, 0x1.34413509f78p-2}, +}; + +constexpr double LOG10_R_ULP[2] = {0x1.0p-96, 0.0}; + +// Generated with Sollya: +// > P = fpminimax(log10(1 + x)/x, 6, [|D...|], [-2^-7; 2^-7], absolute); +// > dirtyinfnorm(log10(1 + x)/x - P, [-2^-7, 2^-7]); +// 0x1.9535684fb3064623001de9b13e6adf06355b5d75bp-57 +constexpr double COEFFS[7] = {0x1.bcb7b1526e50ep-2, -0x1.bcb7b1526e53fp-3, + 0x1.287a763700e4p-3, -0x1.bcb7b14641063p-4, + 0x1.63c61abdf033fp-4, -0x1.28808b8a217fcp-4, + 0x1.ffe99fc1908c6p-5}; + +constexpr double P_ERR = 0x1.0p-52; + +// Number of extra range reduction steps. +constexpr size_t R_STEPS = 5; +constexpr size_t R_BITS = 4; +constexpr size_t R_SIZES = 1 << (R_BITS + 1); + +// Generated by Sollya with: +// for i from 0 to 4 do { +// N = 11 + 4*i; +// print ("{"); +// for j from -2^4 to 2^4 - 1 do { +// r = 2^(-N) * nearestint(2^(N) * ( 1 + (j + 0.5)*2^(-N) - 2^(-2*N-1)) / +// ((1 + j * 2^(-N)) * (1 + (j + 1)*2^(-N)))); +// print(r, ","); +// }; +// print("},"); +// }; +constexpr double RR[R_STEPS][R_SIZES] = { + { + 0x1.02p0, 0x1.01ep0, 0x1.01cp0, 0x1.01ap0, 0x1.018p0, 0x1.016p0, + 0x1.014p0, 0x1.012p0, 0x1.01p0, 0x1.00ep0, 0x1.00cp0, 0x1.00ap0, + 0x1.008p0, 0x1.006p0, 0x1.004p0, 0x1p0, 0x1p0, 0x1.ffcp-1, + 0x1.ff8p-1, 0x1.ff4p-1, 0x1.ffp-1, 0x1.fecp-1, 0x1.fe8p-1, 0x1.fe4p-1, + 0x1.fep-1, 0x1.fdcp-1, 0x1.fd8p-1, 0x1.fd4p-1, 0x1.fdp-1, 0x1.fccp-1, + 0x1.fc8p-1, 0x1.fc4p-1, + }, + { + 0x1.002p0, 0x1.001ep0, 0x1.001cp0, 0x1.001ap0, 0x1.0018p0, + 0x1.0016p0, 0x1.0014p0, 0x1.0012p0, 0x1.001p0, 0x1.000ep0, + 0x1.000cp0, 0x1.000ap0, 0x1.0008p0, 0x1.0006p0, 0x1.0004p0, + 0x1p0, 0x1p0, 0x1.fffcp-1, 0x1.fff8p-1, 0x1.fff4p-1, + 0x1.fffp-1, 0x1.ffecp-1, 0x1.ffe8p-1, 0x1.ffe4p-1, 0x1.ffep-1, + 0x1.ffdcp-1, 0x1.ffd8p-1, 0x1.ffd4p-1, 0x1.ffdp-1, 0x1.ffccp-1, + 0x1.ffc8p-1, 0x1.ffc4p-1, + }, + { + 0x1.0002p0, 0x1.0001ep0, 0x1.0001cp0, 0x1.0001ap0, 0x1.00018p0, + 0x1.00016p0, 0x1.00014p0, 0x1.00012p0, 0x1.0001p0, 0x1.0000ep0, + 0x1.0000cp0, 0x1.0000ap0, 0x1.00008p0, 0x1.00006p0, 0x1.00004p0, + 0x1p0, 0x1p0, 0x1.ffffcp-1, 0x1.ffff8p-1, 0x1.ffff4p-1, + 0x1.ffffp-1, 0x1.fffecp-1, 0x1.fffe8p-1, 0x1.fffe4p-1, 0x1.fffep-1, + 0x1.fffdcp-1, 0x1.fffd8p-1, 0x1.fffd4p-1, 0x1.fffdp-1, 0x1.fffccp-1, + 0x1.fffc8p-1, 0x1.fffc4p-1, + }, + { + 0x1.00002p0, 0x1.00001ep0, 0x1.00001cp0, 0x1.00001ap0, + 0x1.000018p0, 0x1.000016p0, 0x1.000014p0, 0x1.000012p0, + 0x1.00001p0, 0x1.00000ep0, 0x1.00000cp0, 0x1.00000ap0, + 0x1.000008p0, 0x1.000006p0, 0x1.000004p0, 0x1p0, + 0x1p0, 0x1.fffffcp-1, 0x1.fffff8p-1, 0x1.fffff4p-1, + 0x1.fffffp-1, 0x1.ffffecp-1, 0x1.ffffe8p-1, 0x1.ffffe4p-1, + 0x1.ffffep-1, 0x1.ffffdcp-1, 0x1.ffffd8p-1, 0x1.ffffd4p-1, + 0x1.ffffdp-1, 0x1.ffffccp-1, 0x1.ffffc8p-1, 0x1.ffffc4p-1, + }, + { + 0x1.000002p0, 0x1.000001ep0, 0x1.000001cp0, 0x1.000001ap0, + 0x1.0000018p0, 0x1.0000016p0, 0x1.0000014p0, 0x1.0000012p0, + 0x1.000001p0, 0x1.000000ep0, 0x1.000000cp0, 0x1.000000ap0, + 0x1.0000008p0, 0x1.0000006p0, 0x1.0000004p0, 0x1p0, + 0x1p0, 0x1.ffffffcp-1, 0x1.ffffff8p-1, 0x1.ffffff4p-1, + 0x1.ffffffp-1, 0x1.fffffecp-1, 0x1.fffffe8p-1, 0x1.fffffe4p-1, + 0x1.fffffep-1, 0x1.fffffdcp-1, 0x1.fffffd8p-1, 0x1.fffffd4p-1, + 0x1.fffffdp-1, 0x1.fffffccp-1, 0x1.fffffc8p-1, 0x1.fffffc4p-1, + }, +}; + +// log10(2) with 192-bit prepcision generated by SageMath with: +// sage: (s, m, e) = RealField(192)(2).log10().sign_exponent_mantissa(); +// sage: print("MType({", hex(m % 2^64), ",", hex((m >> 64) % 2^64), ",", +// hex((m >> 128) % 2^64), "})"); +const Float192 LOG10_2(/*sign=*/false, /*exponent=*/-193, /*mantissa=*/ + MType({0x26ad30c543d1f34a, 0x8f8959ac0b7c9178, + 0x9a209a84fbcff798})); + +// -log10(r) with 192-bit precision generated by SageMath with: +// +// for i in range(128): +// r = 2^-8 * round( 2^8 * (1 + (i + 1/2)*2^(-7) - 2^(-15)) / ((1 + i*2^-7)*(1 +// + (i + 1)*2^-7)) ); s, m, e = RR(r).log10().sign_mantissa_exponent(); +// print("{false,", e, ", MType({", hex(m % 2^64), ",", hex((m >> 64) % 2^64), +// ",", hex((m >> 128) % 2^64), "})},"); +const Float192 LOG10_R_F192[128] = { + {false, 0, MType(0)}, + {false, -199, + MType({0x3b7bb8a51a78c8bd, 0x6e321c6a7cdecc4, 0xa7c10291a88164ae})}, + {false, -198, + MType({0xf4ba30d77042aac0, 0xa8cb8a86f6040a23, 0x8c596e4695dc8721})}, + {false, -198, + MType({0x49fd078256b3ba8b, 0xeb19e4156cfa1bb7, 0xc546035d8e97a03e})}, + {false, -198, + MType({0x3d964e8c5b12cf00, 0xb6e6ed36c9800088, 0xfea81e1c8278eafa})}, + {false, -197, + MType({0x9c97a4a794669437, 0x714446c4f6a91d15, 0x9c40d3dc0c7cf2f6})}, + {false, -197, + MType({0x1fc57458d58421ab, 0x86b57ea610c7db33, 0xaacde920361dd054})}, + {false, -197, + MType({0x89be5385cc62fb89, 0x5f034a40e6a2f09c, 0xc81618eb15421bab})}, + {false, -197, + MType({0x8b3be744561e443e, 0x594a31b2c5cc891b, 0xe59c7e66c5fedb4b})}, + {false, -196, + MType({0xb1afe1d0add7d035, 0x69b7270237094316, 0x81b1120f31c762fb})}, + {false, -196, + MType({0x6821aa1f743fb5b9, 0x68a0dc47567691c9, 0x892e821975106e09})}, + {false, -196, + MType({0x1935f7436a3ea32c, 0x10bc94f44d216b49, 0x9841c66e17dfe7da})}, + {false, -196, + MType({0x3dfba019afa885f9, 0xe74da327d8d80cb, 0xa77607122c797fcd})}, + {false, -196, + MType({0x7d002c9ce2b6dff3, 0xce697dbaa00d4c7d, 0xaf1cb35bf494a8dd})}, + {false, -196, + MType({0x9ff137c69b2cf640, 0x9c216079dcf0ea95, 0xbe8380a2fa7eba5a})}, + {false, -196, + MType({0x94d5d9c17fa207c5, 0xf5b91598205b8144, 0xce0cbd5f806eb73c})}, + {false, -196, + MType({0x8f995b90cbedc22d, 0x2d3467d253e2d1fb, 0xd5de75ec27e4fe68})}, + {false, -196, + MType({0x8b3be744561e443e, 0x594a31b2c5cc891b, 0xe59c7e66c5fedb4b})}, + {false, -196, + MType({0x97079feab7423d9b, 0xe1e0dda0b3d375a3, 0xed88f6bb355fa196})}, + {false, -196, + MType({0xe86f834386695343, 0x7b98e7f593daf19e, 0xfd7d369cbf7ed8b1})}, + {false, -195, + MType({0x5d4cae75ae8c48e7, 0x3bcdcfe7b23976cd, 0x82c2941bb20bbe1f})}, + {false, -195, + MType({0x67047204db2989f7, 0xb9a7901be7521304, 0x8ad88d04a50d4d2b})}, + {false, -195, + MType({0x829acbf3088a2849, 0x78185dcc37fda019, 0x8eeaa306458b760a})}, + {false, -195, + MType({0x9d5d4ea0a7c8afb1, 0x1581a26448e2ac0e, 0x971d3a05bc0074a2})}, + {false, -195, + MType({0xa078ffeb008a04d8, 0x6c449d409f883fe2, 0x9b3dd1d550c41443})}, + {false, -195, + MType({0x2ede0aa760148d65, 0xa39e56dbb661c829, 0xa38dd453ef15b873})}, + {false, -195, + MType({0x1716e6f9b3225217, 0x961c6e690d8879b4, 0xa7bd56cdde5d76a2})}, + {false, -195, + MType({0x5bcb3818decefa22, 0x42643ced81ec14a, 0xabf1ea3e1d7bd7cf})}, + {false, -195, + MType({0x164c684a1951204a, 0xe9ed4f101c5ef101, 0xb46a757936bf7298})}, + {false, -195, + MType({0xcff88d82b3417f9b, 0xf7e2ab36f09e9013, 0xb8ae8671b3d7dd6c})}, + {false, -195, + MType({0x8a86f0b2dfd799c2, 0x8d3fc63485e7ff12, 0xbcf7dabd87c01afc})}, + {false, -195, + MType({0xf0e9563d52c1d533, 0x13d5c8cb231a53bd, 0xc59a81b26312b9da})}, + {false, -195, + MType({0xa004d184e2faf2ea, 0x5fcd7d0ce937375e, 0xc9f3ef07e1f3fc5e})}, + {false, -195, + MType({0x8a03e643ccfc8ec7, 0x58252dada9f06110, 0xce52d50b94fa253a})}, + {false, -195, + MType({0xed0ad59454d36575, 0x62f01e5ff43708aa, 0xd2b74192fae43777})}, + {false, -195, + MType({0x3af155936af69c0b, 0xb305ced1419fe924, 0xdb90e68b8abf14af})}, + {false, -195, + MType({0x77f157444abbe2fc, 0x3a330921681e2481, 0xe0063bb3912e549c})}, + {false, -195, + MType({0x693eca0a0148cc18, 0x849266a85513dc6d, 0xe48150cf32888b9c})}, + {false, -195, + MType({0x73dac2c5ab4c8eda, 0x80ecf3266b4dcf4, 0xe90234c65a15e533})}, + {false, -195, + MType({0x97079feab7423d9b, 0xe1e0dda0b3d375a3, 0xed88f6bb355fa196})}, + {false, -195, + MType({0x22b519828722ce77, 0x5dab68307fedefcd, 0xf6a852513757dfbd})}, + {false, -195, + MType({0x66278c914a763228, 0xa112379749fd91b8, 0xfb410b64e6393477})}, + {false, -195, + MType({0x12b0b091e7b0299d, 0x1be2585c279c50a5, 0xffdfe15de3c01bac})}, + {false, -194, + MType({0x3f42ceed6e05646f, 0x18aa302171017dcb, 0x8242724a155219f3})}, + {false, -194, + MType({0xd146a4518aa0076f, 0xabc7e698502d43bf, 0x849812d0ccbb5cbd})}, + {false, -194, + MType({0x34708f4b01b4e73f, 0xc339089a51663370, 0x86f0dab1ab5822b6})}, + {false, -194, + MType({0x5e3e593143a3f512, 0x26f70b34ce5cf201, 0x894cd27d9f182c63})}, + {false, -194, + MType({0xb5f326771e3d87a9, 0x676f20a87ab433de, 0x8bac02e8ac3e09ac})}, + {false, -194, + MType({0x633dc078567bc8d9, 0x6db4169cc4b83bc3, 0x8e0e74caae062e24})}, + {false, -194, + MType({0x662d0eb20c51da4f, 0xcd3fdb2fad0d1fd6, 0x907431201c7f651a})}, + {false, -194, + MType({0x7fed29520fdf89d7, 0x49d03e163250d1d4, 0x92dd410ad7bfe103})}, + {false, -194, + MType({0xed5106d40e889904, 0x9ec7dc02d5e723b8, 0x9549add2f8a3c7e0})}, + {false, -194, + MType({0xcdbe2ebc07714f34, 0x34698d03a5442572, 0x97b980e7a743d71c})}, + {false, -194, + MType({0x1e9782e43af2d3d7, 0x522904d1e47f3de, 0x9a2cc3dff7548556})}, + {false, -194, + MType({0xfabfd5317e9bdd56, 0x791a72646c87b975, 0x9ca3807bca9fe93f})}, + {false, -194, + MType({0x2dc99e57977305af, 0x3826f190d655d736, 0x9f1dc0a4b9cea286})}, + {false, -194, + MType({0x5a4dfeb5b0db1f24, 0x544ab3e48199b299, 0xa19b8e6f03b60e45})}, + {false, -194, + MType({0x2888ece2bf37b7e9, 0xbe775fa82961114e, 0xa41cf41a83643487})}, + {false, -194, + MType({0xef4915fda0982302, 0x45798e5019e6c081, 0xa6a1fc13ad241953})}, + {false, -194, + MType({0x33a92c4634bdd6c, 0x91fb1ed0cdc4d1fb, 0xa92ab0f492b772bd})}, + {false, -194, + MType({0xe89863702c85cccb, 0x818b8b9cbbd17b71, 0xabb71d85ef05380d})}, + {false, -194, + MType({0x482ab24ae8fdf5a4, 0xa50c2fea60c5b3b2, 0xae474cc0397f0d4f})}, + {false, -194, + MType({0x682d5b55e9594eb3, 0x58ea34980ad8b720, 0xb0db49ccc1823c8e})}, + {false, -194, + MType({0xae8dceb953c096c0, 0x4b5f71941be508a3, 0xb3732006d1fbbba5})}, + {false, -194, + MType({0xfc1396a39c34fef3, 0x9e405fb8bcb1ff1d, 0xb60edafcdd99ad1d})}, + {false, -194, + MType({0xcff88d82b3417f9b, 0xf7e2ab36f09e9013, 0xb8ae8671b3d7dd6c})}, + {false, -194, + MType({0x6f1a4043a1a8a435, 0xc669639640c305bb, 0xbb522e5dbf37f63b})}, + {false, -194, + MType({0x6f1a4043a1a8a435, 0xc669639640c305bb, 0xbb522e5dbf37f63b})}, + {false, -194, + MType({0x6c4433809b0babe7, 0xa3dc9e464e98764b, 0xbdf9def04cf980ff})}, + {false, -194, + MType({0x38a1d9b9b9370d6d, 0xffd3256b59fa9c59, 0xc0a5a490dea95b5e})}, + {false, -194, + MType({0x60b092e62b5beb8a, 0xb0a2d48672a051a5, 0xc3558be085e3f4bc})}, + {false, -194, + MType({0x1065867f127536e0, 0xacb2ca5d4ca1c10e, 0xc609a1bb4aa98f59})}, + {false, -194, + MType({0x8030cfce4646062f, 0x43690b9e3cde0d01, 0xc8c1f3399ca7d33b})}, + {false, -194, + MType({0x8030cfce4646062f, 0x43690b9e3cde0d01, 0xc8c1f3399ca7d33b})}, + {false, -194, + MType({0xd806ff9947bc6ca7, 0x18b1fd60383f7e59, 0xcb7e8db1cfe04827})}, + {false, -194, + MType({0x65af114cbdb0193e, 0x248757e5f45af3d, 0xce3f7eb9a517c969})}, + {false, -194, + MType({0x3569862a1e8f9a4c, 0x7c4acd605be48bc1, 0xd104d427de7fbcc4})}, + {false, -194, + MType({0x94e3cbc5cd693dda, 0x58ff63629a92652c, 0xd3ce9c15e10ec927})}, + {false, -194, + MType({0x94e3cbc5cd693dda, 0x58ff63629a92652c, 0xd3ce9c15e10ec927})}, + {false, -194, + MType({0x1e0a73c970dbbf33, 0x6b49be3bd8c89f10, 0xd69ce4e16303fcdd})}, + {false, -194, + MType({0x59899d5040e21558, 0xe6dd603a881e9060, 0xd96fbd2e2814c9cc})}, + {false, -194, + MType({0x71549f0a4d78d49c, 0x89e281c98c1d705c, 0xdc4733e7cbcbfc8c})}, + {false, -194, + MType({0x71549f0a4d78d49c, 0x89e281c98c1d705c, 0xdc4733e7cbcbfc8c})}, + {false, -194, + MType({0xf4eee5981334e57, 0xdc0db7cf0cce9f32, 0xdf2358439aa5dd12})}, + {false, -194, + MType({0xd50afdf84e68b269, 0xfdf1c5b846db9dea, 0xe20439c27a7c01b8})}, + {false, -194, + MType({0xd95ac10b65558e44, 0x3dd7eab48869c401, 0xe4e9e832e2da0c05})}, + {false, -194, + MType({0xd95ac10b65558e44, 0x3dd7eab48869c401, 0xe4e9e832e2da0c05})}, + {false, -194, + MType({0xe9377fb1f3b453b6, 0x4e8fcc900b41daee, 0xe7d473b2e5db8f2a})}, + {false, -194, + MType({0xe772954c39d07f3b, 0x7593e1a9e9173599, 0xeac3ecb24a3ac7b4})}, + {false, -194, + MType({0xe772954c39d07f3b, 0x7593e1a9e9173599, 0xeac3ecb24a3ac7b4})}, + {false, -194, + MType({0xa6d10312a95362d4, 0xe7741396b49e1ce4, 0xedb863f4b73f982d})}, + {false, -194, + MType({0xd0d55aebaeb5abfc, 0xc8ba4f8f47b85a5b, 0xf0b1ea93f34675a7})}, + {false, -194, + MType({0xd0d55aebaeb5abfc, 0xc8ba4f8f47b85a5b, 0xf0b1ea93f34675a7})}, + {false, -194, + MType({0x7e1dea1275662695, 0x7007c1276821b705, 0xf3b09202359f9787})}, + {false, -194, + MType({0x54dc283e4f79339c, 0x7ee19afe6db7e324, 0xf6b46c0c8c8fdea1})}, + {false, -194, + MType({0x54dc283e4f79339c, 0x7ee19afe6db7e324, 0xf6b46c0c8c8fdea1})}, + {false, -194, + MType({0xf7844244016096c0, 0xedf54f37f6d4041f, 0xf9bd8add584687f0})}, + {false, -194, + MType({0x94a99151573d5249, 0xefe52ccf03e7dee0, 0xfccc00fedba4e6fb})}, + {false, -194, + MType({0x94a99151573d5249, 0xefe52ccf03e7dee0, 0xfccc00fedba4e6fb})}, + {false, -194, + MType({0x12b0b091e7b0299d, 0x1be2585c279c50a5, 0xffdfe15de3c01bac})}, + {false, -193, + MType({0xeba2ae5f7d1c7168, 0xe0b571f5c91b0445, 0x817c9fa643880404})}, + {false, -193, + MType({0xeba2ae5f7d1c7168, 0xe0b571f5c91b0445, 0x817c9fa643880404})}, + {false, -193, + MType({0x2db8874aa1eb0c3c, 0x7178594bef2def59, 0x830c17427ea55eca})}, + {false, -193, + MType({0x2db8874aa1eb0c3c, 0x7178594bef2def59, 0x830c17427ea55eca})}, + {false, -193, + MType({0xf3cb58bd1bbe04f0, 0x9a741bb171158d29, 0x849e6196487c1d1c})}, + {false, -193, + MType({0xd95b712663014da, 0x1a618264446cb495, 0x863388eb55ebd295})}, + {false, -193, + MType({0xd95b712663014da, 0x1a618264446cb495, 0x863388eb55ebd295})}, + {false, -193, + MType({0x2e66a10dfdce751, 0x71dbdbbec51d7657, 0x87cb97c3ff9eac18})}, + {false, -193, + MType({0x2e66a10dfdce751, 0x71dbdbbec51d7657, 0x87cb97c3ff9eac18})}, + {false, -193, + MType({0x84a2c0cd81dbcf53, 0xabe0b522230f7d13, 0x896698dce4cff76c})}, + {false, -193, + MType({0xae1f8a1def2acf5a, 0xd28e8adafea703b3, 0x8b04972e9d4d3011})}, + {false, -193, + MType({0xae1f8a1def2acf5a, 0xd28e8adafea703b3, 0x8b04972e9d4d3011})}, + {false, -193, + MType({0x8a02390202a4a59d, 0x208422d83be34b26, 0x8ca59def7b5cefc5})}, + {false, -193, + MType({0x8a02390202a4a59d, 0x208422d83be34b26, 0x8ca59def7b5cefc5})}, + {false, -193, + MType({0x420c16bd3939f912, 0xc385cf49402af0e4, 0x8e49b8955e3ffb8a})}, + {false, -193, + MType({0x420c16bd3939f912, 0xc385cf49402af0e4, 0x8e49b8955e3ffb8a})}, + {false, -193, + MType({0x8a1754a1ee7c990, 0xda982a614e12c6dd, 0x8ff0f2d7960a075c})}, + {false, -193, + MType({0xe77cb3d650c2718e, 0x38401fc1c1b5c2b, 0x919b58b0d999bbc8})}, + {false, -193, + MType({0xe77cb3d650c2718e, 0x38401fc1c1b5c2b, 0x919b58b0d999bbc8})}, + {false, -193, + MType({0x3c50b7234a381be8, 0xa9b55d3f16da746a, 0x9348f6614f821394})}, + {false, -193, + MType({0x3c50b7234a381be8, 0xa9b55d3f16da746a, 0x9348f6614f821394})}, + {false, -193, + MType({0xd31cd763e50a0231, 0x88d2d1473d4f7f4, 0x94f9d870aac256a5})}, + {false, -193, + MType({0xd31cd763e50a0231, 0x88d2d1473d4f7f4, 0x94f9d870aac256a5})}, + {false, -193, + MType({0x8eb2e675bc182d0d, 0x7c1e117dea19e9e5, 0x96ae0bb05c35d5bd})}, + {false, -193, + MType({0x8eb2e675bc182d0d, 0x7c1e117dea19e9e5, 0x96ae0bb05c35d5bd})}, + {false, -193, + MType({0x78c2d9cf6e98b1c1, 0x336db0630f536fb9, 0x98659d3dd9b12532})}, + {false, -193, + MType({0x78c2d9cf6e98b1c1, 0x336db0630f536fb9, 0x98659d3dd9b12532})}, + {false, -193, + MType({0x26ad30c543d1f34a, 0x8f8959ac0b7c9178, 0x9a209a84fbcff798})}, +}; + +// -log10(r) for further range reduction steps, generated by SageMath with: +// +// RR = RealField(192); +// for i in range(5): +// N = 11 + 4*i; +// print("{"); +// for j in range(-2^4, 2^4): +// r = 2^(-N) * round(2^(N) * ( 1 + (j + 0.5)*2^(-N) - 2^(-2*N-1) ) / ((1 + +// j * 2^(-N)) * (1 + (j + 1)*2^(-N)))); a = -RR(r).log10(); if j in [0, +// -1]: +// r = 1; a = RR(0); +// s, m, e = a.sign_mantissa_exponent() +// sgn = "{false," if s == 1 else "{true,"; +// print(sgn, e, ", MType({", hex(m % 2^64), ",", hex((m >> 64) % 2^64), +// ",", hex((m >> 128) % 2^64), "})},"); +// print("},"); +const Float192 LOG10_RR[R_STEPS][R_SIZES] = { + { + {true, -200, + MType({0xf52b7aea9ca0c476, 0xdd4a47e1490df56, 0xdd7ea3910f69332e})}, + {true, -200, + MType({0x11c54af8b7b5ac0, 0xbe14368ead9df21c, 0xcfb39f5a164f371c})}, + {true, -200, + MType({0xdc8b8e1f46c98b22, 0xb46a6050fcd513ca, 0xc1e6e4dcf45e0ee0})}, + {true, -200, + MType({0x10c91fa8440cf6a8, 0x799e2aecea385841, 0xb41873accfbaba29})}, + {true, -200, + MType({0xefff2920f96d3591, 0x3440eb8005ad171b, 0xa6484b5ca5f7eaea})}, + {true, -200, + MType({0xaa51f382c80587cb, 0x21e85464267c47a4, 0x98766b7f4c01d932})}, + {true, -200, + MType({0xaf1454629688e40a, 0xee383c5f131898c4, 0x8aa2d3a76e0a0a79})}, + {true, -201, + MType({0xf128ae4671df433, 0x15022fc12f1747d4, 0xf99b06cf1ee618b9})}, + {true, -201, + MType({0x511d8d64285691ec, 0x411e70646c13ae93, 0xddecf4a415784564})}, + {true, -201, + MType({0xd9e8330f453fac1c, 0x662bb5e078c532bf, 0xc23b6ff222d9d207})}, + {true, -201, + MType({0xa9d37fde93b3c989, 0xebcfc062f09deb23, 0xa68677dd5801ce9b})}, + {true, -201, + MType({0xe9eadfa1556bcaec, 0xbfdd2c7f388fc013, 0x8ace0b8973a63413})}, + {true, -202, + MType({0x743a88400b3b04bc, 0xc21595e745f1fa15, 0xde245433c425b5c5})}, + {true, -202, + MType({0xf50e04c0bd73e444, 0x663cf2b27e8f1ffa, 0xa6a5a5637a00afdc})}, + {true, -203, + MType({0xf860837f66c35a6a, 0xba0324530edaa03e, 0xde4011cf2daaff31})}, + {false, 0, MType({0x0, 0x0, 0x0})}, + {false, 0, MType({0x0, 0x0, 0x0})}, + {false, -204, + MType({0x8319f4e45b099e86, 0x20fa8a73c585f633, 0xde69bf8f58005dfc})}, + {false, -203, + MType({0x58e9d6dee00a8fc3, 0x35e3298f4bb2aa0a, 0xde77a8c714b08d28})}, + {false, -202, + MType({0xc6b4c904be2cbf01, 0xa1e66c6203725d4f, 0xa6e42f3ccf49959d})}, + {false, -202, + MType({0xebc8b4278d3f93e9, 0x5ad3480ccfbe988f, 0xde93822dfe812587})}, + {false, -201, + MType({0x9e52f5c983d9caa4, 0xe5f7c52cdf119d4, 0x8b24e77b0cb60a84})}, + {false, -201, + MType({0x9a29985625bf9936, 0x84e42dd54c2ff44e, 0xa7038baa64c5cd39})}, + {false, -201, + MType({0x15c143896f774901, 0xde013ef4ba5abcfa, 0xc2e5ae853064a921})}, + {false, -201, + MType({0x8c6b1043496971e, 0x8f6c475cc382401c, 0xdecb50ebece5e146})}, + {false, -201, + MType({0x95457befdc15d5a7, 0x64a67faf3f1c74c2, 0xfab473bf6c2589b9})}, + {false, -200, + MType({0x825120e3dbb4f086, 0x341aa1df5e8fab33, 0x8b508bf06a597f35})}, + {false, -200, + MType({0xc69ade976589376a, 0x8b19159a7036ac6c, 0x99489f18d0fdba55})}, + {false, -200, + MType({0x132b76e2a5ed5ce1, 0x7b8b2852580ae3e2, 0xa74273c9d23a53b9})}, + {false, -200, + MType({0x6f0c9495185af64, 0xe9ad89ebfeaf5a02, 0xb53e0a7480e3cf3c})}, + {false, -200, + MType({0x3dc11a3e41e693ee, 0xe12996e107931f4b, 0xc33b638a1a7dc81d})}, + {false, -200, + MType({0x2ce27c62f7c5d30e, 0xb43caf8e7b891066, 0xd13a7f7c07506f7d})}, + }, + { + {true, -204, + MType({0xea57bded8e15fa61, 0x634cea6750617a91, 0xde4df4140b42822f})}, + {true, -204, + MType({0x8a2e8bb53b241b41, 0x9185547f24195fab, 0xd069e52741db93b6})}, + {true, -204, + MType({0x265b41c72630e265, 0xfbd7e970aef9dbb8, 0xc285ba757feb2781})}, + {true, -204, + MType({0xd2e480f950aea396, 0xe820a1a6f319285a, 0xb4a173fe56691147})}, + {true, -204, + MType({0xb3300685a98f7031, 0x9aedc1c1ba7d0694, 0xa6bd11c1564a8ace})}, + {true, -204, + MType({0x407aba2445f75697, 0x89de4989587519ca, 0x98d893be108233d7})}, + {true, -204, + MType({0xbe4f7e3cbefc71ce, 0x8d306ba207233c43, 0x8af3f9f41600120a})}, + {true, -205, + MType({0x5449f3ae334e8294, 0x2100088006aeff92, 0xfa1e88c5ef6321c2})}, + {true, -205, + MType({0x2245be0033c6dafe, 0x856a0a3a00fcf3c1, 0xde54e6148d030322})}, + {true, -205, + MType({0xf8b7f31304662b13, 0x8a4399de0be1c455, 0xc28b0bd326b035f2})}, + {true, -205, + MType({0x5abacdedb1dc0b2c, 0xb3a2c1407cf6d38d, 0xa6c0fa00de35f314})}, + {true, -205, + MType({0x874695793b5a312e, 0x220af90d74ea17c7, 0x8af6b09cd55a3e68})}, + {true, -206, + MType({0x70a9c05700a3f2c5, 0xd791cf6a70c3a504, 0xde585f4c5bbbcd3d})}, + {true, -206, + MType({0x1f465fbffe379fdf, 0xe95b52a2781c25e3, 0xa6c2ee3812f90a28})}, + {true, -207, + MType({0x35af2ed67a089e4, 0x10a633f2c4a8ea22, 0xde5a1bf627b1f68f})}, + {false, 0, MType({0x0, 0x0, 0x0})}, + {false, 0, MType({0x0, 0x0, 0x0})}, + {false, -208, + MType({0xe0faf0a837063878, 0x140a2bff40e0d66f, 0xde5cb706384ddbaf})}, + {false, -207, + MType({0xd660ff616f334614, 0xed4a68e5e6e83dde, 0xde5d95658a729eab})}, + {false, -206, + MType({0x542eeb05597ecdea, 0x7c9dc000df79edde, 0xa6c6d6d56238dd6e})}, + {false, -206, + MType({0x5f8e2902ab25d093, 0x3281f1872cdbee94, 0xde5f522b21e3e25a})}, + {false, -205, + MType({0xba7cb3452f07c347, 0x292ef5f6fbe96c00, 0x8afc1e5ae08b4643})}, + {false, -205, + MType({0xde1cc00ce5b7249c, 0xf1466edaa96e356d, 0xa6c8cb3b7e5bbbfd})}, + {false, -205, + MType({0xa521ef720e08d8fe, 0x936a112cb6265f7e, 0xc295afb848dbd759})}, + {false, -205, + MType({0x1d1317f34a2fba9e, 0x8a607fd695dfc3d9, 0xde62cbd21e895473})}, + {false, -205, + MType({0x9c74fa60d5502c53, 0x65a49326981bdf3f, 0xfa301f89dde726b4})}, + {false, -204, + MType({0x4295ea162700a15d, 0xc36b8713ceefe2de, 0x8afed57032bebc7c})}, + {false, -204, + MType({0x10e84795ee07f1c, 0x7068ee550f5ffb37, 0x98e5b6eb49ecd6df})}, + {false, -204, + MType({0xf71013dc630dfafa, 0x5c2e76c953e3e3e5, 0xa6ccb436a3c72fa4})}, + {false, -204, + MType({0x61fc137c9dce9644, 0xb3425e818888a098, 0xb4b3cd52af99afe6})}, + {false, -204, + MType({0xe923dcabcfbadd8e, 0x8e4950fa5c943bbe, 0xc29b023fdcb2dccf})}, + {false, -204, + MType({0xd941d748e2a23bcc, 0xd4d3796d93d6750b, 0xd08252fe9a63d7aa})}, + }, + { + {true, -208, + MType({0x22058fdb723bb37, 0xce31a0d27359396f, 0xde5afa4e86f7f3ee})}, + {true, -208, + MType({0x2e22d8a1e7a512a1, 0x9f63aaa563e9a399, 0xd07557b0de924142})}, + {true, -208, + MType({0xd1fffbb5f7c92f10, 0x67e63bbe1405c20c, 0xc28fb35684fedaab})}, + {true, -208, + MType({0x2473a71714b9ef53, 0x5440d7a131392da8, 0xb4aa0d3f79ce9499})}, + {true, -208, + MType({0xdec055ae33e59ad5, 0xe0e635a681d259e1, 0xa6c4656bbc924352})}, + {true, -208, + MType({0x953365af70272280, 0xda1f690c752fdefe, 0x98debbdb4cdabaf4})}, + {true, -208, + MType({0xc6c161d92abe9aa4, 0x5bf708fead2b177f, 0x8af9108e2a38cf72})}, + {true, -209, + MType({0x78732843e6506315, 0xa448b11f012c975c, 0xfa26c708a87aa929})}, + {true, -209, + MType({0x863d53d84705d0ae, 0xefecdd48ed894c31, 0xde5b697b94f23bf7})}, + {true, -209, + MType({0x87704029104c21a9, 0xb07ebbab782457af, 0xc290087518f9fe3b})}, + {true, -209, + MType({0x4adb1df9649c50c1, 0x9a7eb8811ec3e6bb, 0xa6c4a3f533b3968d})}, + {true, -209, + MType({0xcfe3880042e86013, 0x11fd6995111a926, 0x8af93bfbe440ab33})}, + {true, -210, + MType({0x2cbf7a2e7b9cbcf0, 0xac3bfd925e6b33e1, 0xde5ba1125385c43b})}, + {true, -210, + MType({0x4d418d19762cbd66, 0x53289e84744549cb, 0xa6c4c33a06b7c1d9})}, + {true, -211, + MType({0x4c3b3ad870a5dd12, 0xa74dab3bd6067bc7, 0xde5bbcddc0b533aa})}, + {false, 0, MType({0x0, 0x0, 0x0})}, + {false, 0, MType({0x0, 0x0, 0x0})}, + {false, -212, + MType({0x29423084899fc9b9, 0xbee710a5ace7c8d4, 0xde5be68ef5db7f99})}, + {false, -211, + MType({0x1c57cf04091e59cc, 0x7b644b13993cf4ef, 0xde5bf474b6df8331})}, + {false, -210, + MType({0xd259a65cb34a4a8a, 0x64136e97019d0a3a, 0xa6c501c3dba75dc2})}, + {false, -210, + MType({0x6f497708cc18c4d8, 0xa2912e03afc8cc28, 0xde5c10403fda6db5})}, + {false, -209, + MType({0x865e042dd50e1d3a, 0xbf5dccd967504856, 0x8af992d7c4e2d5b5})}, + {false, -209, + MType({0xe1702b0f17301f13, 0xe22978efa7a9629f, 0xa6c52108dd92e8c1})}, + {false, -209, + MType({0x74a88aedf1375fb4, 0x82cd723bf1524680, 0xc290b2b36adbcda2})}, + {false, -209, + MType({0xa923a00c40053158, 0x4dd6f8576e9188b7, 0xde5c47d76d9be24e})}, + {false, -209, + MType({0x923113e7e6eec47e, 0x5368655f1ce3110a, 0xfa27e074e6b1850f})}, + {false, -208, + MType({0xee64cf0bae6dd4c9, 0x83b16ff7eecace8b, 0x8af9be45eb7d8a41})}, + {false, -208, + MType({0x782dbb6881f2d3d, 0x20c8069e4ae400de, 0x98df8e0e1fab77cd})}, + {false, -208, + MType({0xff9e92ff9157f7ff, 0x9ee5e19f2eecdb54, 0xa6c55f931051bacc})}, + {false, -208, + MType({0x4260c63c04224e80, 0xce16dd7f60311bf6, 0xb4ab32d4bddf830b})}, + {false, -208, + MType({0xbf40b2a151280765, 0x3099a17e0461648b, 0xc29107d328c40080})}, + {false, -208, + MType({0xb32695665b8f1329, 0xfaf478d3d0b312ff, 0xd076de8e516e6348})}, + }, + { + {true, -212, + MType({0xdaf98ea8088ccd55, 0x19be3fabd93832c4, 0xde5bcac37ac6587d})}, + {true, -212, + MType({0x92832d76e51450a5, 0x88a86b30027dc8a5, 0xd0760ee7b9136936})}, + {true, -212, + MType({0xdd67de1487eb3913, 0xc61c028b8f230f14, 0xc29052f02bebe878})}, + {true, -212, + MType({0x8b5c1ba51e27536f, 0xb8cfce8fa982ea7, 0xb4aa96dcd34f6716})}, + {true, -212, + MType({0xe1a1b61dba0d85b7, 0x8fd43f0c9ce444d2, 0xa6c4daadaf3d75e0})}, + {true, -212, + MType({0x60e206e644f9e560, 0x872f9b3fd2141246, 0x98df1e62bfb5a5aa})}, + {true, -212, + MType({0xeef14d02362d0513, 0x2341d13c21a7d8cf, 0x8af961fc04b78746})}, + {true, -213, + MType({0xb56b5c514a13e254, 0x26251c2ccc00d194, 0xfa274af2fc85570b})}, + {true, -213, + MType({0x60d8b357006a3fed, 0x61cd853e796bc2c, 0xde5bd1b658ad4676})}, + {true, -213, + MType({0xa90e9309cbc1b814, 0x3a0de60782dd1939, 0xc29058421de5fe71})}, + {true, -213, + MType({0xb6bc3ab198cec5a2, 0x10652e9b1ce538bb, 0xa6c4de964c2ea0a1})}, + {true, -213, + MType({0x27d91560ba3c3289, 0xd259757215d9aa0d, 0x8af964b2e3864ea9})}, + {true, -214, + MType({0xef6374ae583f4c18, 0x87d6afabfba0644e, 0xde5bd52fc7d8545f})}, + {true, -214, + MType({0x4bf40373e818d7f5, 0x47ca9999c73441f5, 0xa6c4e08a9abea9ae})}, + {true, -215, + MType({0x256483d247de2b54, 0xaf6e93be8e4c1764, 0xde5bd6ec7f7bc110})}, + {false, 0, MType({0x0, 0x0, 0x0})}, + {false, 0, MType({0x0, 0x0, 0x0})}, + {false, -216, + MType({0x5f6593112c03bf0b, 0xd5c0ebcf0fa0221e, 0xde5bd98793024346})}, + {false, -215, + MType({0x11b7b6679d212c54, 0x6d0063a923dd0cc6, 0xde5bda65eede65ed})}, + {false, -214, + MType({0x6079e21db1a5b651, 0xc424c8976e2ebcfa, 0xa6c4e473380da326})}, + {false, -214, + MType({0x8a7f3f059746be25, 0xa9bf32001043629c, 0xde5bdc22a69d9e19})}, + {false, -213, + MType({0xc8dbe969fc9bb1cd, 0x9e8e77fa22e00248, 0x8af96a20a1907043})}, + {false, -213, + MType({0xe129bb635500e8db, 0xaf3be9e7aa56d682, 0xa6c4e66786cc9393})}, + {false, -213, + MType({0xea8e87193d810fa0, 0xfc3aff5aaf056566, 0xc29062e603041758})}, + {false, -213, + MType({0x254d7de51cd5637d, 0x8014f0f360272d82, 0xde5bdf9c1637d9ef})}, + {false, -213, + MType({0xe9795a136f5856c, 0x3a891f89bcf72a40, 0xfa275c89c068b9b3})}, + {false, -212, + MType({0x3c5338931e9d2409, 0x18468a2ba2fa5549, 0x8af96cd780cbca80})}, + {false, -212, + MType({0x3ab6a381bda506d5, 0x362640905714e443, 0x98df2b85ece2a519})}, + {false, -212, + MType({0x13b57109141a8764, 0xfe94a02fc639c0e3, 0xa6c4ea5024795bd2})}, + {false, -212, + MType({0xc613ec4aff62b164, 0x7bddaab60665b883, 0xb4aaa93627905ddb})}, + {false, -212, + MType({0x29f60bd76dda9de7, 0xbae8765350c99434, 0xc2906837f6281a60})}, + {false, -212, + MType({0x6d1b70f85e1ebdd6, 0xcb372dd0da70965b, 0xd076275590410090})}, + }, + { + {true, -216, + MType({0xcf260846db91386b, 0x81645201b36e17b9, 0xde5bd7cadb50f0d8})}, + {true, -216, + MType({0x8f96908bc0be4701, 0xcdea7b1a0dc7ad41, 0xd0761a5b34fd720c})}, + {true, -216, + MType({0x71e2e36ca8d985f3, 0x5ce6604b0911c5ed, 0xc2905ce9d1f24872})}, + {true, -216, + MType({0xdaf377e6a6fc292b, 0x6e0982bd8d96ee, 0xb4aa9f76b22f739a})}, + {true, -216, + MType({0xf97df5dd10eab18e, 0x8a9754fe0c0073a6, 0xa6c4e201d5b4f314})}, + {true, -216, + MType({0x260badb439a3192f, 0xcd77f7489d9ef50b, 0x98df248b3c82c672})}, + {true, -216, + MType({0x35f8aeb949a552f2, 0x9b257b3ce3f82109, 0x8af96712e698ed45})}, + {true, -217, + MType({0x82d6f89adeda85fe, 0x8b6a840831c123d7, 0xfa275331a7eece3b})}, + {true, -217, + MType({0xb88ac395452a99d1, 0x3e79062c7cbb3b7c, 0xde5bd83a093c6718})}, + {true, -217, + MType({0x8416ca30683117d1, 0xf3a098743d20fb63, 0xc2905d3ef11aa442})}, + {true, -217, + MType({0xe98fafcacc5781ed, 0x4f0b030a9742eaff, 0xa6c4e2405f8984dd})}, + {true, -217, + MType({0x9dc90f008c47f3e0, 0xf4e1bab83f55f5a0, 0x8af9673e54890808})}, + {true, -218, + MType({0x1ff62cfc0b87b23a, 0x129bc1c6f293726e, 0xde5bd871a03259cf})}, + {true, -218, + MType({0xb2b2c5c7e8e3dd1b, 0x60f08720313daa3e, 0xa6c4e25fa473e535})}, + {true, -219, + MType({0x4a6c56a5be458d59, 0x3a25757e00f4e3a0, 0xde5bd88d6bad6110})}, + {false, 0, MType({0x0, 0x0, 0x0})}, + {false, 0, MType({0x0, 0x0, 0x0})}, + {false, -220, + MType({0x3f16581bde8d9c4a, 0x225916c2b3f33c90, 0xde5bd8b71ce5fd51})}, + {false, -219, + MType({0x8d1575503bb9c7fd, 0x44397830931fddc, 0xde5bd8c502a38b5e})}, + {false, -218, + MType({0x91d4a95df896a79f, 0xe454dec82bde52e4, 0xa6c4e29e2e48d4cc})}, + {false, -218, + MType({0x242c88441091cf56, 0xa6e2721f2afc3cce, 0xde5bd8e0ce1eae6a})}, + {false, -217, + MType({0x9b6ef45b7cbebc03, 0x80bf0ff2f6cd9f92, 0x8af967953069aa22})}, + {false, -217, + MType({0x7fe96798a2b597a7, 0x55ee1480619827c4, 0xa6c4e2bd7333640c})}, + {false, -217, + MType({0xedc6a1463ce32849, 0x2ed8ba8c6fa81c97, 0xc2905de92f6c85d1})}, + {false, -217, + MType({0xd59f31668df9feb7, 0x6759c94e2d017fac, 0xde5bd9186515104f})}, + {false, -217, + MType({0x337d83d075e0c1ab, 0x5b4c5b5f180b9fe1, 0xfa27544b142d0465})}, + {false, -216, + MType({0x390379ad9871c8a7, 0xb345ef5d90dd6545, 0x8af967c09e5a3178})}, + {false, -216, + MType({0x73d9d08521f9533d, 0xf27a0a6056dcfe57, 0x98df255d6f559668})}, + {false, -216, + MType({0xb70a87a69080a500, 0x99308918494a4a1f, 0xa6c4e2fbfd08b172})}, + {false, -216, + MType({0x8995c0b3074c289d, 0xd5579f970cf000a8, 0xb4aaa09c47738304})}, + {false, -216, + MType({0x72650f85eed7af38, 0xd4ddab9f8032bbab, 0xc2905e3e4e960b8e})}, + {false, -216, + MType({0xeb04ee2cfc701a37, 0xc5b134a5bb25cf2d, 0xd0761be212704b7f})}, + }, +}; + +// > P = fpminimax(log10(1 + x)/x, 4, [|192...|], [-2^-27, 2^-27]); +// > P; +// > dirtyinfnorm(log10(1 + x)/x - P, [-2^-27, 2^-27]); +// 0x1.287a7...p-143 +const Float192 BIG_COEFFS[5]{ + {false, -194, + MType({0x1fc14ee0c0158d73, 0x762ec7601912bf70, 0x58f189dd49436234})}, + {true, -195, + MType({0x8f42a80e947f6357, 0x67836140b941fe04, 0xde5bd8a93728747a})}, + {false, -194, + MType({0xf6f00690b1fa8ba9, 0x78e7c71fc8ca2d3a, 0x943d3b1b7a1af663})}, + {true, -191, + MType({0x255a69358264a2d1, 0xa6ab7555f5a64d33, 0x1bcb7b1526e50e32})}, + {false, -193, + MType({0x3ee3460246fdf301, 0x355baaafad33dc32, 0xde5bd8a937287195})}, +}; + +// Reuse the output of the fast pass range reduction. +// |m_x| < 2^-7 +double log10_accurate(int e_x, int index, double m_x) { + Float192 e_x_f192(static_cast(e_x)); + Float192 sum = fputil::quick_add(LOG10_R_F192[index], + fputil::quick_mul(LOG10_2, e_x_f192)); + + fputil::DoubleDouble mx{/*lo*/ 0.0, /*hi*/ m_x}; + + // Further range reductions. + int idx[R_STEPS]; + double scale = 0x1.0p+7; + const fputil::DyadicFloat<128> NEG_ONE(-1.0); + for (size_t i = 0; i < R_STEPS; ++i) { + scale *= 0x1.0p+4; + int id = static_cast(fputil::multiply_add(mx.hi, scale, 0x1.0p+4)); + idx[i] = id; + double r = RR[i][id]; + fputil::DoubleDouble rm = fputil::exact_mult(r, mx.hi); + rm.hi += r - 1.0; + rm.lo = fputil::multiply_add(r, mx.lo, rm.lo); + mx = fputil::exact_add(rm.hi, rm.lo); + sum = fputil::quick_add(sum, LOG10_RR[i][id]); + } + // Now |m_x| <= 2^-27 + Float192 m_hi(mx.hi); + Float192 m_lo(mx.lo); + Float192 m = fputil::quick_add(m_hi, m_lo); + Float192 p = fputil::quick_mul(m, BIG_COEFFS[0]); + + for (size_t i = 1; i < 5; ++i) { + auto aa = fputil::quick_add(p, BIG_COEFFS[i]); + p = fputil::quick_mul(m, aa); + }; + + return static_cast(fputil::quick_add(sum, p)); +} + +} // namespace + +// TODO(lntue): Make the implementation correctly rounded for non-FMA targets. +LLVM_LIBC_FUNCTION(double, log10, (double x)) { + using FPBits_t = typename fputil::FPBits; + FPBits_t xbits(x); + int x_e = -1023; + + if (unlikely(xbits.uintval() < FPBits_t::MIN_NORMAL || + xbits.uintval() > FPBits_t::MAX_NORMAL)) { + if (xbits.is_zero()) { + // return -Inf and raise FE_DIVBYZERO. + return -1.0 / 0.0; + } + if (xbits.get_sign() && !xbits.is_nan()) { + return FPBits_t::build_quiet_nan(0); + } + if (xbits.is_inf_or_nan()) { + return x; + } + // Normalize denormal inputs. + xbits.set_val(x * 0x1.0p52); + x_e -= 52; + } + + // log10(x) = log10(2^x_e * x_m) + // = x_e * log10(2) + log10(x_m) + + // Range reduction for log10(x_m): + // For each x_m, we would like to find R such that: + // |R * x_m - 1| < C + uint64_t x_u = xbits.uintval(); + int shifted = x_u >> 45; + size_t index = shifted & 0x7F; + double r = R[index]; + + x_e += (x_u >> 52) & 0x7FF; + double e_x = static_cast(x_e); + + int e_err = (e_x == -1) && (index == 0x7F); + int logr_err = (index == 0); + + double err = + fputil::multiply_add(e_x, LOG10_2_ULP[e_err], LOG10_R_ULP[logr_err]); + + // hi is exact + double hi = fputil::multiply_add(e_x, LOG10_2_HI, LOG10_R[index].hi); + // lo errors ~ e_x * LSB(LOG10_2_LO) + LSB(LOG10_R[index].lo) + rounding err + // <= 2 * (e_x * LSB(LOG10_2_LO) + LSB(LOG10_R[index].lo)) + double lo = fputil::multiply_add(e_x, LOG10_2_LO, LOG10_R[index].lo); + // A bound on the error is given + // in "Note on FastTwoSum with Directed Roundings" + // by Paul Zimmermann, https://hal.inria.fr/hal-03798376, 2022. + // Theorem 1 says that + // the difference between a+b and hi+lo is bounded by 2u^2|a+b| + // and also by 2u^2|hi|. Here u=2^-53, thus we get: + // |(a+b)-(hi+lo)| <= 2^-105 min(|a+b|,|hi|) + // So the overall errors <= 2^-105 min(|a+b|, |hi|) + 2*(...) + fputil::DoubleDouble rr = fputil::exact_add(hi, lo); + + uint64_t x_m = (x_u & 0x000F'FFFF'FFFF'FFFFULL) | 0x3FF0'0000'0000'0000ULL; + double m = FPBits_t(x_m).get_val(); + + double u = fputil::multiply_add(r, m, -1.0); // exact + err = fputil::multiply_add(u, P_ERR, err); + + // Degree-7 minimax polynomial + double u_sq = u * u; + double p0 = u * COEFFS[0]; + double p1 = fputil::multiply_add(u, COEFFS[2], COEFFS[1]); + double p2 = fputil::multiply_add(u, COEFFS[4], COEFFS[3]); + double p3 = fputil::multiply_add(u, COEFFS[6], COEFFS[5]); + double p01 = fputil::multiply_add(u_sq, p1, p0); + double p23 = fputil::multiply_add(u_sq, p3, p2); + double u4 = u_sq * u_sq; + fputil::DoubleDouble re = fputil::exact_add(rr.hi, p01); + double ll = fputil::multiply_add(u4, p23, re.lo + rr.lo); + // Lower bound from the result + double left = re.hi + (ll - err); + // Upper bound from the result + double right = re.hi + (ll + err); + + // Ziv's test if fast pass is accurate enough. + if (left == right) + return left; + + // Exact cases: + switch (x_u) { + case 0x3ff0000000000000: // x = 1.0 + return 0.0; + case 0x4024000000000000: // x = 10.0 + return 1.0; + case 0x4059000000000000: // x = 10^2 + return 2.0; + case 0x408f400000000000: // x = 10^3 + return 3.0; + case 0x40c3880000000000: // x = 10^4 + return 4.0; + case 0x40f86a0000000000: // x = 10^5 + return 5.0; + case 0x412e848000000000: // x = 10^6 + return 6.0; + case 0x416312d000000000: // x = 10^7 + return 7.0; + case 0x4197d78400000000: // x = 10^8 + return 8.0; + case 0x41cdcd6500000000: // x = 10^9 + return 9.0; + case 0x4202a05f20000000: // x = 10^10 + return 10.0; + case 0x42374876e8000000: // x = 10^11 + return 11.0; + case 0x426d1a94a2000000: // x = 10^12 + return 12.0; + case 0x42a2309ce5400000: // x = 10^13 + return 13.0; + case 0x42d6bcc41e900000: // x = 10^14 + return 14.0; + case 0x430c6bf526340000: // x = 10^15 + return 15.0; + case 0x4341c37937e08000: // x = 10^16 + return 16.0; + case 0x4376345785d8a000: // x = 10^17 + return 17.0; + case 0x43abc16d674ec800: // x = 10^18 + return 18.0; + case 0x43e158e460913d00: // x = 10^19 + return 19.0; + case 0x4415af1d78b58c40: // x = 10^20 + return 20.0; + case 0x444b1ae4d6e2ef50: // x = 10^21 + return 21.0; + case 0x4480f0cf064dd592: // x = 10^22 + return 22.0; + } + + return log10_accurate(x_e, index, u); +} + +} // namespace __llvm_libc diff --git a/libc/src/math/log10.h b/libc/src/math/log10.h new file mode 100644 --- /dev/null +++ b/libc/src/math/log10.h @@ -0,0 +1,18 @@ +//===-- Implementation header for log10 -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_LOG10_H +#define LLVM_LIBC_SRC_MATH_LOG10_H + +namespace __llvm_libc { + +double log10(double x); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_MATH_LOG10_H diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt --- a/libc/test/src/math/CMakeLists.txt +++ b/libc/test/src/math/CMakeLists.txt @@ -1317,6 +1317,23 @@ libc.src.__support.FPUtil.fp_bits ) +add_fp_unittest( + log10_test + NEED_MPFR + SUITE + libc_math_unittests + SRCS + log10_test.cpp + DEPENDS + libc.include.errno + libc.src.errno.errno + libc.include.math + libc.src.math.log10 + libc.src.__support.FPUtil.fp_bits + FLAGS + FMA_OPT__ONLY +) + add_fp_unittest( log10f_test NEED_MPFR diff --git a/libc/test/src/math/log10_test.cpp b/libc/test/src/math/log10_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/src/math/log10_test.cpp @@ -0,0 +1,125 @@ +//===-- Unittests for log10 -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/FPUtil/FPBits.h" +#include "src/math/log10.h" +#include "utils/MPFRWrapper/MPFRUtils.h" +#include "utils/UnitTest/FPMatcher.h" +#include "utils/UnitTest/Test.h" +#include "utils/testutils/StreamWrapper.h" +#include + +#include +#include + +namespace mpfr = __llvm_libc::testing::mpfr; +auto outs = __llvm_libc::testutils::outs(); + +DECLARE_SPECIAL_CONSTANTS(double) + +TEST(LlvmLibcLog10Test, SpecialNumbers) { + EXPECT_FP_EQ(aNaN, __llvm_libc::log10(aNaN)); + EXPECT_FP_EQ(inf, __llvm_libc::log10(inf)); + EXPECT_TRUE(FPBits(__llvm_libc::log10(neg_inf)).is_nan()); + EXPECT_FP_EQ(neg_inf, __llvm_libc::log10(0.0)); + EXPECT_FP_EQ(neg_inf, __llvm_libc::log10(-0.0)); + EXPECT_TRUE(FPBits(__llvm_libc::log10(-1.0)).is_nan()); + EXPECT_FP_EQ(zero, __llvm_libc::log10(1.0)); +} + +TEST(LlvmLibcLog10Test, TrickyInputs) { + constexpr int N = 27; + constexpr uint64_t INPUTS[N] = { + 0x3ff0000000000000, // x = 1.0 + 0x4024000000000000, // x = 10.0 + 0x4059000000000000, // x = 10^2 + 0x408f400000000000, // x = 10^3 + 0x40c3880000000000, // x = 10^4 + 0x40f86a0000000000, // x = 10^5 + 0x412e848000000000, // x = 10^6 + 0x416312d000000000, // x = 10^7 + 0x4197d78400000000, // x = 10^8 + 0x41cdcd6500000000, // x = 10^9 + 0x4202a05f20000000, // x = 10^10 + 0x42374876e8000000, // x = 10^11 + 0x426d1a94a2000000, // x = 10^12 + 0x42a2309ce5400000, // x = 10^13 + 0x42d6bcc41e900000, // x = 10^14 + 0x430c6bf526340000, // x = 10^15 + 0x4341c37937e08000, // x = 10^16 + 0x4376345785d8a000, // x = 10^17 + 0x43abc16d674ec800, // x = 10^18 + 0x43e158e460913d00, // x = 10^19 + 0x4415af1d78b58c40, // x = 10^20 + 0x444b1ae4d6e2ef50, // x = 10^21 + 0x4480f0cf064dd592, // x = 10^22 + 0x3fefffffffef06ad, 0x3fefde0f22c7d0eb, + 0x225e7812faadb32f, 0x3fee1076964c2903, + }; + for (int i = 0; i < N; ++i) { + double x = double(FPBits(INPUTS[i])); + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Log10, x, + __llvm_libc::log10(x), 0.5); + } +} + +TEST(LlvmLibcLog10Test, InDoubleRange) { + constexpr uint64_t COUNT = 1234561; + constexpr uint64_t STEP = 0x3FF0'0000'0000'0000ULL / COUNT; + + auto test = [&](mpfr::RoundingMode rounding_mode) { + mpfr::ForceRoundingMode __r(rounding_mode); + uint64_t fails = 0; + uint64_t count = 0; + uint64_t cc = 0; + double mx, mr = 0.0; + double tol = 0.5; + + for (uint64_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) { + double x = FPBits(v).get_val(); + if (isnan(x) || isinf(x) || x < 0.0) + continue; + errno = 0; + double result = __llvm_libc::log10(x); + ++cc; + if (isnan(result)) + continue; + + ++count; + // ASSERT_MPFR_MATCH(mpfr::Operation::Log10, x, result, 0.5); + if (!EXPECT_MPFR_MATCH_ROUNDING_SILENTLY(mpfr::Operation::Log10, x, result, + 0.5, rounding_mode)) { + ++fails; + while (!EXPECT_MPFR_MATCH_ROUNDING_SILENTLY(mpfr::Operation::Log10, x, + result, tol, rounding_mode)) { + mx = x; + mr = result; + tol *= 2.0; + } + } + } + outs << " Log10 failed: " << fails << "/" << count << "/" << cc + << " tests.\n"; + outs << " Max ULPs is at most: " << tol << ".\n"; + if (fails) { + EXPECT_MPFR_MATCH(mpfr::Operation::Log10, mx, mr, 0.5, rounding_mode); + } + }; + + outs << " Test Rounding To Nearest...\n"; + test(mpfr::RoundingMode::Nearest); + + outs << " Test Rounding Downward...\n"; + test(mpfr::RoundingMode::Downward); + + outs << " Test Rounding Upward...\n"; + test(mpfr::RoundingMode::Upward); + + outs << " Test Rounding Toward Zero...\n"; + test(mpfr::RoundingMode::TowardZero); +}