diff --git a/libc/docs/math.rst b/libc/docs/math.rst --- a/libc/docs/math.rst +++ b/libc/docs/math.rst @@ -166,7 +166,7 @@ log1p |check| log2 |check| sin |check| large -sincos 0.776 ULPs large +sincos |check| large sinh |check| sqrt |check| |check| |check| tanh |check| @@ -232,6 +232,8 @@ +--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ | sinf | 13 | 25 | 54 | 57 | :math:`[-\pi, \pi]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA | +--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ +| sincosf | 20 | 30 | 62 | 68 | :math:`[-\pi, \pi]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA | ++--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ | sinhf | 23 | 64 | 73 | 141 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA | +--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ | tanhf | 25 | 59 | 95 | 125 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA | diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -46,14 +46,26 @@ libc.src.errno.errno ) -add_object_library( +add_header_library( + range_reduction + HDRS + range_reduction.h + range_reduction_fma.h + DEPENDS + libc.src.__support.FPUtil.fputil + libc.src.__support.FPUtil.fma + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.nearest_integer +) + +add_header_library( sincosf_utils HDRS sincosf_utils.h - SRCS - sincosf_data.cpp DEPENDS - .math_utils + .range_reduction + libc.src.__support.FPUtil.fputil + libc.src.__support.FPUtil.polyeval ) add_entrypoint_object( @@ -62,16 +74,13 @@ cosf.cpp HDRS ../cosf.h - range_reduction.h - range_reduction_fma.h DEPENDS - .common_constants + .sincosf_utils libc.include.math libc.src.errno.errno libc.src.__support.FPUtil.fputil libc.src.__support.FPUtil.fma libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.nearest_integer libc.src.__support.FPUtil.polyeval COMPILE_OPTIONS -O3 @@ -83,16 +92,14 @@ sinf.cpp HDRS ../sinf.h - range_reduction.h - range_reduction_fma.h DEPENDS - .common_constants + .range_reduction + .sincosf_utils libc.include.math libc.src.errno.errno libc.src.__support.FPUtil.fputil libc.src.__support.FPUtil.fma libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.nearest_integer libc.src.__support.FPUtil.polyeval COMPILE_OPTIONS -O3 @@ -105,9 +112,14 @@ HDRS ../sincosf.h DEPENDS + .range_reduction .sincosf_utils libc.include.math libc.src.errno.errno + libc.src.__support.FPUtil.fputil + libc.src.__support.FPUtil.fma + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.polyeval COMPILE_OPTIONS -O3 ) diff --git a/libc/src/math/generic/common_constants.h b/libc/src/math/generic/common_constants.h --- a/libc/src/math/generic/common_constants.h +++ b/libc/src/math/generic/common_constants.h @@ -31,18 +31,13 @@ // > for i from 0 to 127 do { D(exp(i / 128)); }; extern const double EXP_M2[128]; -// Lookup table for sin(k * pi / 16) with k = 0, ..., 31. -// Table is generated with Sollya as follow: -// > display = hexadecimal; -// > for k from 0 to 31 do { D(sin(k * pi/16)); }; -extern const double SIN_K_PI_OVER_16[32]; - static constexpr int EXP_bits_p = 5; static constexpr int EXP_num_p = 1 << EXP_bits_p; // Wolfram alpha: N[Table[2^x-1,{x,-16/32,15/32,1/32}],27] // printf("%.13a,\n", d[i]); extern const double EXP_2_POW[EXP_num_p]; + } // namespace __llvm_libc #endif // LLVM_LIBC_SRC_MATH_GENERIC_COMMON_CONSTANTS_H diff --git a/libc/src/math/generic/common_constants.cpp b/libc/src/math/generic/common_constants.cpp --- a/libc/src/math/generic/common_constants.cpp +++ b/libc/src/math/generic/common_constants.cpp @@ -240,21 +240,4 @@ 0x1.13821818624b4p-2, 0x1.2ff6b54d8a89cp-2, 0x1.4d0ad5a753e07p-2, 0x1.6ac1f752150a5p-2, 0x1.891fac0e95613p-2}; -// Lookup table for sin(k * pi / 16) with k = 0, ..., 31. -// Table is generated with Sollya as follow: -// > display = hexadecimal; -// > for k from 0 to 31 do { D(sin(k * pi/16)); }; -const double SIN_K_PI_OVER_16[32] = { - 0x0.0000000000000p+0, 0x1.8f8b83c69a60bp-3, 0x1.87de2a6aea963p-2, - 0x1.1c73b39ae68c8p-1, 0x1.6a09e667f3bcdp-1, 0x1.a9b66290ea1a3p-1, - 0x1.d906bcf328d46p-1, 0x1.f6297cff75cb0p-1, 0x1.0000000000000p+0, - 0x1.f6297cff75cb0p-1, 0x1.d906bcf328d46p-1, 0x1.a9b66290ea1a3p-1, - 0x1.6a09e667f3bcdp-1, 0x1.1c73b39ae68c8p-1, 0x1.87de2a6aea963p-2, - 0x1.8f8b83c69a60bp-3, 0x0.0000000000000p+0, -0x1.8f8b83c69a60bp-3, - -0x1.87de2a6aea963p-2, -0x1.1c73b39ae68c8p-1, -0x1.6a09e667f3bcdp-1, - -0x1.a9b66290ea1a3p-1, -0x1.d906bcf328d46p-1, -0x1.f6297cff75cb0p-1, - -0x1.0000000000000p+0, -0x1.f6297cff75cb0p-1, -0x1.d906bcf328d46p-1, - -0x1.a9b66290ea1a3p-1, -0x1.6a09e667f3bcdp-1, -0x1.1c73b39ae68c8p-1, - -0x1.87de2a6aea963p-2, -0x1.8f8b83c69a60bp-3}; - } // namespace __llvm_libc diff --git a/libc/src/math/generic/cosf.cpp b/libc/src/math/generic/cosf.cpp --- a/libc/src/math/generic/cosf.cpp +++ b/libc/src/math/generic/cosf.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "src/math/cosf.h" -#include "common_constants.h" +#include "sincosf_utils.h" #include "src/__support/FPUtil/BasicOperations.h" #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" @@ -18,45 +18,8 @@ #include -#if defined(LIBC_TARGET_HAS_FMA) -#include "range_reduction_fma.h" -// using namespace __llvm_libc::fma; -using __llvm_libc::fma::FAST_PASS_BOUND; -using __llvm_libc::fma::large_range_reduction; -using __llvm_libc::fma::small_range_reduction; -#else -#include "range_reduction.h" -// using namespace __llvm_libc::generic; -using __llvm_libc::generic::FAST_PASS_BOUND; -using __llvm_libc::generic::large_range_reduction; -using __llvm_libc::generic::small_range_reduction; -#endif - namespace __llvm_libc { -// Exceptional cases for cosf. -static constexpr int COSF_EXCEPTS = 6; - -static constexpr fputil::ExceptionalValues CosfExcepts{ - /* inputs */ { - 0x55325019, // x = 0x1.64a032p43 - 0x5922aa80, // x = 0x1.4555p51 - 0x5aa4542c, // x = 0x1.48a858p54 - 0x5f18b878, // x = 0x1.3170fp63 - 0x6115cb11, // x = 0x1.2b9622p67 - 0x7beef5ef, // x = 0x1.ddebdep120 - }, - /* outputs (RZ, RU offset, RD offset, RN offset) */ - { - {0x3f4ea5d2, 1, 0, 0}, // x = 0x1.64a032p43, cos(x) = 0x1.9d4ba4p-1 (RZ) - {0x3f08aebe, 1, 0, 1}, // x = 0x1.4555p51, cos(x) = 0x1.115d7cp-1 (RZ) - {0x3efa40a4, 1, 0, 0}, // x = 0x1.48a858p54, cos(x) = 0x1.f48148p-2 (RZ) - {0x3f7f14bb, 1, 0, 0}, // x = 0x1.3170fp63, cos(x) = 0x1.fe2976p-1 (RZ) - {0x3f78142e, 1, 0, 1}, // x = 0x1.2b9622p67, cos(x) = 0x1.f0285cp-1 (RZ) - {0x3f08a21c, 1, 0, - 0}, // x = 0x1.ddebdep120, cos(x) = 0x1.114438p-1 (RZ) - }}; - LLVM_LIBC_FUNCTION(float, cosf, (float x)) { using FPBits = typename fputil::FPBits; FPBits xbits(x); @@ -132,58 +95,26 @@ return result; } - // TODO(lntue): refactor range reduction and most of polynomial approximation - // to share between sinf, cosf, and sincosf. - int k; - double y; - - if (likely(x_abs < FAST_PASS_BOUND)) { - k = small_range_reduction(xd, y); - } else { - // x is inf or nan. - if (unlikely(x_abs >= 0x7f80'0000U)) { - if (x_abs == 0x7f80'0000U) { - errno = EDOM; - fputil::set_except(FE_INVALID); - } - return x + - FPBits::build_nan(1 << (fputil::MantissaWidth::VALUE - 1)); + // x is inf or nan. + if (unlikely(x_abs >= 0x7f80'0000U)) { + if (x_abs == 0x7f80'0000U) { + errno = EDOM; + fputil::set_except(FE_INVALID); } - - k = large_range_reduction(xd, xbits.get_exponent(), y); + return x + + FPBits::build_nan(1 << (fputil::MantissaWidth::VALUE - 1)); } - // After range reduction, k = round(x * 16 / pi) and y = (x * 16 / pi) - k. - // So k is an integer and -0.5 <= y <= 0.5. - // Then cos(x) = cos((k + y)*pi/16) - // = cos(y*pi/16) * cos(k*pi/16) - sin(y*pi/16) * sin(k*pi/16) - - double ysq = y * y; - - // Degree-6 minimax even polynomial for sin(y*pi/16)/y generated by Sollya - // with: - // > Q = fpminimax(sin(y*pi/16)/y, [|0, 2, 4, 6|], [|D...|], [0, 0.5]); - double sin_y = - y * fputil::polyeval(ysq, 0x1.921fb54442d17p-3, -0x1.4abbce6256adp-10, - 0x1.466bc5a5ac6b3p-19, -0x1.32bdcb4207562p-29); - // Degree-8 minimax even polynomial for cos(y*pi/16) generated by Sollya with: - // > P = fpminimax(cos(x*pi/16), [|0, 2, 4, 6, 8|], [|1, D...|], [0, 0.5]); - // Note that cosm1_y = cos(y*pi/16) - 1. - double cosm1_y = - ysq * fputil::polyeval(ysq, -0x1.3bd3cc9be45dcp-6, 0x1.03c1f081b08ap-14, - -0x1.55d3c6fb0fb6ep-24, 0x1.e1d3d60f58873p-35); - - double sin_k = -SIN_K_PI_OVER_16[k & 31]; - // cos(k * pi/16) = sin(k * pi/16 + pi/2) = sin((k + 8) * pi/16). - // cos_k = y * cos(k * pi/16) - double cos_k = SIN_K_PI_OVER_16[(k + 8) & 31]; - // Combine the results with the sine of sum formula: // cos(x) = cos((k + y)*pi/16) // = cos(y*pi/16) * cos(k*pi/16) - sin(y*pi/16) * sin(k*pi/16) // = cosm1_y * cos_k + sin_y * sin_k // = (cosm1_y * cos_k + cos_k) + sin_y * sin_k - return fputil::multiply_add(sin_y, sin_k, + double sin_k, cos_k, sin_y, cosm1_y; + + sincosf_eval(xd, x_abs, sin_k, cos_k, sin_y, cosm1_y); + + return fputil::multiply_add(sin_y, -sin_k, fputil::multiply_add(cosm1_y, cos_k, cos_k)); } diff --git a/libc/src/math/generic/sincosf.cpp b/libc/src/math/generic/sincosf.cpp --- a/libc/src/math/generic/sincosf.cpp +++ b/libc/src/math/generic/sincosf.cpp @@ -7,71 +7,213 @@ //===----------------------------------------------------------------------===// #include "src/math/sincosf.h" -#include "math_utils.h" #include "sincosf_utils.h" - +#include "src/__support/FPUtil/BasicOperations.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/except_value_utils.h" +#include "src/__support/FPUtil/multiply_add.h" #include "src/__support/common.h" -#include -#include +#include namespace __llvm_libc { +// Exceptional values +static constexpr int N_EXCEPTS = 10; + +static constexpr uint32_t EXCEPT_INPUTS[N_EXCEPTS] = { + 0x3b5637f5, // x = 0x1.ac6feap-9 + 0x3fa7832a, // x = 0x1.4f0654p0 + 0x46199998, // x = 0x1.33333p13 + 0x55325019, // x = 0x1.64a032p43 + 0x55cafb2a, // x = 0x1.95f654p44 + 0x5922aa80, // x = 0x1.4555p51 + 0x5aa4542c, // x = 0x1.48a858p54 + 0x5f18b878, // x = 0x1.3170fp63 + 0x6115cb11, // x = 0x1.2b9622p67 + 0x7beef5ef, // x = 0x1.ddebdep120 +}; + +static constexpr uint32_t EXCEPT_OUTPUTS_SIN[N_EXCEPTS][4] = { + {0x3b5637dc, 1, 0, 0}, // x = 0x1.ac6feap-9, sin(x) = 0x1.ac6fb8p-9 (RZ) + {0x3f7741b5, 1, 0, 1}, // x = 0x1.4f0654p0, sin(x) = 0x1.ee836ap-1 (RZ) + {0xbeb1fa5d, 0, 1, 0}, // x = 0x1.33333p13, sin(x) = -0x1.63f4bap-2 (RZ) + {0xbf171adf, 0, 1, 1}, // x = 0x1.64a032p43, sin(x) = -0x1.2e35bep-1 (RZ) + {0xbf7e7a16, 0, 1, 1}, // x = 0x1.95f654p44, sin(x) = -0x1.fcf42cp-1 (RZ) + {0xbf587521, 0, 1, 1}, // x = 0x1.4555p51, sin(x) = -0x1.b0ea42p-1 (RZ) + {0x3f5f5646, 1, 0, 0}, // x = 0x1.48a858p54, sin(x) = 0x1.beac8cp-1 (RZ) + {0x3dad60f6, 1, 0, 1}, // x = 0x1.3170fp63, sin(x) = 0x1.5ac1ecp-4 (RZ) + {0xbe7cc1e0, 0, 1, 1}, // x = 0x1.2b9622p67, sin(x) = -0x1.f983cp-3 (RZ) + {0xbf587d1b, 0, 1, 1}, // x = 0x1.ddebdep120, sin(x) = -0x1.b0fa36p-1 (RZ) +}; + +static constexpr uint32_t EXCEPT_OUTPUTS_COS[N_EXCEPTS][4] = { + {0x3f7fffa6, 1, 0, 0}, // x = 0x1.ac6feap-9, cos(x) = 0x1.ffff4cp-1 (RZ) + {0x3e84aabf, 1, 0, 1}, // x = 0x1.4f0654p0, cos(x) = 0x1.09557ep-2 (RZ) + {0xbf70090b, 0, 1, 0}, // x = 0x1.33333p13, cos(x) = -0x1.e01216p-1 (RZ) + {0x3f4ea5d2, 1, 0, 0}, // x = 0x1.64a032p43, cos(x) = 0x1.9d4ba4p-1 (RZ) + {0x3ddf11f3, 1, 0, 1}, // x = 0x1.95f654p44, cos(x) = 0x1.be23e6p-4 (RZ) + {0x3f08aebe, 1, 0, 1}, // x = 0x1.4555p51, cos(x) = 0x1.115d7cp-1 (RZ) + {0x3efa40a4, 1, 0, 0}, // x = 0x1.48a858p54, cos(x) = 0x1.f48148p-2 (RZ) + {0x3f7f14bb, 1, 0, 0}, // x = 0x1.3170fp63, cos(x) = 0x1.fe2976p-1 (RZ) + {0x3f78142e, 1, 0, 1}, // x = 0x1.2b9622p67, cos(x) = 0x1.f0285cp-1 (RZ) + {0x3f08a21c, 1, 0, 0}, // x = 0x1.ddebdep120, cos(x) = 0x1.114438p-1 (RZ) +}; + // Fast sincosf implementation. Worst-case ULP is 0.5607, maximum relative // error is 0.5303 * 2^-23. A single-step range reduction is used for // small values. Large inputs have their range reduced using fast integer // arithmetic. -LLVM_LIBC_FUNCTION(void, sincosf, (float y, float *sinp, float *cosp)) { - double x = y; - double s; - int n; - const sincos_t *p = &SINCOSF_TABLE[0]; - - if (abstop12(y) < abstop12(PIO4)) { - double x2 = x * x; - - if (unlikely(abstop12(y) < abstop12(as_float(0x39800000)))) { - if (unlikely(abstop12(y) < abstop12(as_float(0x800000)))) - // Force underflow for tiny y. - force_eval(x2); - *sinp = y; +LLVM_LIBC_FUNCTION(void, sincosf, (float x, float *sinp, float *cosp)) { + using FPBits = typename fputil::FPBits; + FPBits xbits(x); + + uint32_t x_abs = xbits.uintval() & 0x7fff'ffffU; + double xd = static_cast(x); + + // Range reduction: + // For |x| > pi/16, we perform range reduction as follows: + // Find k and y such that: + // x = (k + y) * pi/16 + // k is an integer + // |y| < 0.5 + // For small range (|x| < 2^46 when FMA instructions are available, 2^22 + // otherwise), this is done by performing: + // k = round(x * 16/pi) + // y = x * 16/pi - k + // For large range, we will omit all the higher parts of 16/pi such that the + // least significant bits of their full products with x are larger than 31, + // since: + // sin((k + y + 32*i) * pi/16) = sin(x + i * 2pi) = sin(x), and + // cos((k + y + 32*i) * pi/16) = cos(x + i * 2pi) = cos(x). + // + // When FMA instructions are not available, we store the digits of 16/pi in + // chunks of 28-bit precision. This will make sure that the products: + // x * SIXTEEN_OVER_PI_28[i] are all exact. + // When FMA instructions are available, we simply store the digits of 16/pi in + // chunks of doubles (53-bit of precision). + // So when multiplying by the largest values of single precision, the + // resulting output should be correct up to 2^(-208 + 128) ~ 2^-80. By the + // worst-case analysis of range reduction, |y| >= 2^-38, so this should give + // us more than 40 bits of accuracy. For the worst-case estimation of range + // reduction, see for instances: + // Elementary Functions by J-M. Muller, Chapter 11, + // Handbook of Floating-Point Arithmetic by J-M. Muller et. al., + // Chapter 10.2. + // + // Once k and y are computed, we then deduce the answer by the sine and cosine + // of sum formulas: + // sin(x) = sin((k + y)*pi/16) + // = sin(y*pi/16) * cos(k*pi/16) + cos(y*pi/16) * sin(k*pi/16) + // cos(x) = cos((k + y)*pi/16) + // = cos(y*pi/16) * cos(k*pi/16) - sin(y*pi/16) * sin(k*pi/16) + // The values of sin(k*pi/16) and cos(k*pi/16) for k = 0..31 are precomputed + // and stored using a vector of 32 doubles. Sin(y*pi/16) and cos(y*pi/16) are + // computed using degree-7 and degree-8 minimax polynomials generated by + // Sollya respectively. + + // |x| < 0x1.0p-12f + if (unlikely(x_abs < 0x3980'0000U)) { + if (unlikely(x_abs == 0U)) { + // For signed zeros. + *sinp = x; *cosp = 1.0f; return; } + // When |x| < 2^-12, the relative errors of the approximations + // sin(x) ~ x, cos(x) ~ 1 + // are: + // |sin(x) - x| / |sin(x)| < |x^3| / (6|x|) + // = x^2 / 6 + // < 2^-25 + // < epsilon(1)/2. + // |cos(x) - 1| < |x^2 / 2| = 2^-25 < epsilon(1)/2. + // So the correctly rounded values of sin(x) and cos(x) are: + // sin(x) = x - sign(x)*eps(x) if rounding mode = FE_TOWARDZERO, + // or (rounding mode = FE_UPWARD and x is + // negative), + // = x otherwise. + // cos(x) = 1 - eps(x) if rounding mode = FE_TOWARDZERO or FE_DOWWARD, + // = 1 otherwise. + // To simplify the rounding decision and make it more efficient and to + // prevent compiler to perform constant folding, we use + // sin(x) = fma(x, -2^-25, x), + // cos(x) = fma(x*0.5f, -x, 1) + // instead. + // Note: to use the formula x - 2^-25*x to decide the correct rounding, we + // do need fma(x, -2^-25, x) to prevent underflow caused by -2^-25*x when + // |x| < 2^-125. For targets without FMA instructions, we simply use + // double for intermediate results as it is more efficient than using an + // emulated version of FMA. +#if defined(LIBC_TARGET_HAS_FMA) + *sinp = fputil::multiply_add(x, -0x1.0p-25f, x); + *cosp = fputil::multiply_add(FPBits(x_abs).get_val(), -0x1.0p-25f, 1.0f); +#else + *sinp = static_cast(fputil::multiply_add(xd, -0x1.0p-25, xd)); + *cosp = static_cast(fputil::multiply_add( + static_cast(FPBits(x_abs).get_val()), -0x1.0p-25, 1.0)); +#endif // LIBC_TARGET_HAS_FMA + return; + } - sincosf_poly(x, x2, p, 0, sinp, cosp); - } else if (abstop12(y) < abstop12(120.0f)) { - x = reduce_fast(x, p, &n); - - // Setup the signs for sin and cos. - s = p->sign[n & 3]; - - if (n & 2) - p = &SINCOSF_TABLE[1]; - - sincosf_poly(x * s, x * x, p, n, sinp, cosp); - } else if (likely(abstop12(y) < abstop12(INFINITY))) { - uint32_t xi = as_uint32_bits(y); - int sign = xi >> 31; - - x = reduce_large(xi, &n); - - // Setup signs for sin and cos - include original sign. - s = p->sign[(n + sign) & 3]; - - if ((n + sign) & 2) - p = &SINCOSF_TABLE[1]; + // x is inf or nan. + if (unlikely(x_abs >= 0x7f80'0000U)) { + if (x_abs == 0x7f80'0000U) { + errno = EDOM; + fputil::set_except(FE_INVALID); + } + *sinp = + x + FPBits::build_nan(1 << (fputil::MantissaWidth::VALUE - 1)); + *cosp = *sinp; + return; + } - sincosf_poly(x * s, x * x, p, n, sinp, cosp); - } else { - // Return NaN if Inf or NaN for both sin and cos. - *sinp = *cosp = y - y; + // Check exceptional values. + for (int i = 0; i < N_EXCEPTS; ++i) { + if (unlikely(x_abs == EXCEPT_INPUTS[i])) { + uint32_t s = EXCEPT_OUTPUTS_SIN[i][0]; // FE_TOWARDZERO + uint32_t c = EXCEPT_OUTPUTS_COS[i][0]; // FE_TOWARDZERO + bool x_sign = x < 0; + switch (fputil::get_round()) { + case FE_UPWARD: + s += x_sign ? EXCEPT_OUTPUTS_SIN[i][2] : EXCEPT_OUTPUTS_SIN[i][1]; + c += EXCEPT_OUTPUTS_COS[i][1]; + break; + case FE_DOWNWARD: + s += x_sign ? EXCEPT_OUTPUTS_SIN[i][1] : EXCEPT_OUTPUTS_SIN[i][2]; + c += EXCEPT_OUTPUTS_COS[i][2]; + break; + case FE_TONEAREST: + s += EXCEPT_OUTPUTS_SIN[i][3]; + c += EXCEPT_OUTPUTS_COS[i][3]; + break; + } + *sinp = x_sign ? -FPBits(s).get_val() : FPBits(s).get_val(); + *cosp = FPBits(c).get_val(); - // Needed to set errno for +-Inf, the add is a hack to work - // around a gcc register allocation issue: just passing y - // affects code generation in the fast path. - invalid(y + y); + return; + } } + + // Combine the results with the sine and cosine of sum formulas: + // sin(x) = sin((k + y)*pi/16) + // = sin(y*pi/16) * cos(k*pi/16) + cos(y*pi/16) * sin(k*pi/16) + // = sin_y * cos_k + (1 + cosm1_y) * sin_k + // = sin_y * cos_k + (cosm1_y * sin_k + sin_k) + // cos(x) = cos((k + y)*pi/16) + // = cos(y*pi/16) * cos(k*pi/16) - sin(y*pi/16) * sin(k*pi/16) + // = cosm1_y * cos_k + sin_y * sin_k + // = (cosm1_y * cos_k + cos_k) + sin_y * sin_k + double sin_k, cos_k, sin_y, cosm1_y; + + sincosf_eval(xd, x_abs, sin_k, cos_k, sin_y, cosm1_y); + + *sinp = fputil::multiply_add(sin_y, cos_k, + fputil::multiply_add(cosm1_y, sin_k, sin_k)); + *cosp = fputil::multiply_add(sin_y, -sin_k, + fputil::multiply_add(cosm1_y, cos_k, cos_k)); } } // namespace __llvm_libc diff --git a/libc/src/math/generic/sincosf_data.cpp b/libc/src/math/generic/sincosf_data.cpp deleted file mode 100644 --- a/libc/src/math/generic/sincosf_data.cpp +++ /dev/null @@ -1,51 +0,0 @@ -//===-- sinf/cosf data tables ---------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "math_utils.h" -#include "sincosf_utils.h" - -#include - -namespace __llvm_libc { - -// The constants and polynomials for sine and cosine. The 2nd entry -// computes -cos (x) rather than cos (x) to get negation for free. -constexpr sincos_t SINCOSF_TABLE[2] = { - {{1.0, -1.0, -1.0, 1.0}, - 0x1.45f306dc9c883p+23, - 0x1.921fb54442d18p+0, - 0x1p+0, - -0x1.ffffffd0c621cp-2, - 0x1.55553e1068f19p-5, - -0x1.6c087e89a359dp-10, - 0x1.99343027bf8c3p-16, - -0x1.555545995a603p-3, - 0x1.1107605230bc4p-7, - -0x1.994eb3774cf24p-13}, - {{1.0, -1.0, -1.0, 1.0}, - 0x1.45f306dc9c883p+23, - 0x1.921fb54442d18p+0, - -0x1p+0, - 0x1.ffffffd0c621cp-2, - -0x1.55553e1068f19p-5, - 0x1.6c087e89a359dp-10, - -0x1.99343027bf8c3p-16, - -0x1.555545995a603p-3, - 0x1.1107605230bc4p-7, - -0x1.994eb3774cf24p-13}, -}; - -// Table with 4/PI to 192 bit precision. To avoid unaligned accesses -// only 8 new bits are added per entry, making the table 4 times larger. -constexpr uint32_t INV_PIO4[24] = { - 0xa2, 0xa2f9, 0xa2f983, 0xa2f9836e, 0xf9836e4e, 0x836e4e44, - 0x6e4e4415, 0x4e441529, 0x441529fc, 0x1529fc27, 0x29fc2757, 0xfc2757d1, - 0x2757d1f5, 0x57d1f534, 0xd1f534dd, 0xf534ddc0, 0x34ddc0db, 0xddc0db62, - 0xc0db6295, 0xdb629599, 0x6295993c, 0x95993c43, 0x993c4390, 0x3c439041}; - -} // namespace __llvm_libc diff --git a/libc/src/math/generic/sincosf_utils.h b/libc/src/math/generic/sincosf_utils.h --- a/libc/src/math/generic/sincosf_utils.h +++ b/libc/src/math/generic/sincosf_utils.h @@ -1,4 +1,4 @@ -//===-- Collection of utils for cosf/sinf/sincosf ---------------*- C++ -*-===// +//===-- Collection of utils for sinf/cosf/sincosf ---------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -9,132 +9,102 @@ #ifndef LLVM_LIBC_SRC_MATH_SINCOSF_UTILS_H #define LLVM_LIBC_SRC_MATH_SINCOSF_UTILS_H -#include "math_utils.h" - -#include +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/except_value_utils.h" +#include "src/__support/common.h" + +#if defined(LIBC_TARGET_HAS_FMA) +#include "range_reduction_fma.h" +// using namespace __llvm_libc::fma; +using __llvm_libc::fma::FAST_PASS_BOUND; +using __llvm_libc::fma::large_range_reduction; +using __llvm_libc::fma::small_range_reduction; +#else +#include "range_reduction.h" +// using namespace __llvm_libc::generic; +using __llvm_libc::generic::FAST_PASS_BOUND; +using __llvm_libc::generic::large_range_reduction; +using __llvm_libc::generic::small_range_reduction; +#endif // LIBC_TARGET_HAS_FMA namespace __llvm_libc { -// 2PI * 2^-64. -static constexpr double PI63 = 0x1.921fb54442d18p-62; -// PI / 4. -static constexpr double PIO4 = 0x1.921fb54442d18p-1; - -// The constants and polynomials for sine and cosine. -typedef struct { - double sign[4]; // Sign of sine in quadrants 0..3. - double hpi_inv; // 2 / PI ( * 2^24 ). - double hpi; // PI / 2. - double c0, c1, c2, c3, c4; // Cosine polynomial. - double s1, s2, s3; // Sine polynomial. -} sincos_t; - -// Polynomial data (the cosine polynomial is negated in the 2nd entry). -extern const sincos_t SINCOSF_TABLE[2]; - -// Table with 4/PI to 192 bit precision. -extern const uint32_t INV_PIO4[]; - -// Top 12 bits of the float representation with the sign bit cleared. -static inline uint32_t abstop12(float x) { - return (as_uint32_bits(x) >> 20) & 0x7ff; -} - -// Compute the sine and cosine of inputs X and X2 (X squared), using the -// polynomial P and store the results in SINP and COSP. N is the quadrant, -// if odd the cosine and sine polynomials are swapped. -static inline void sincosf_poly(double x, double x2, const sincos_t *p, int n, - float *sinp, float *cosp) { - double x3, x4, x5, x6, s, c, c1, c2, s1; - - x4 = x2 * x2; - x3 = x2 * x; - c2 = p->c3 + x2 * p->c4; - s1 = p->s2 + x2 * p->s3; - - // Swap sin/cos result based on quadrant. - float *tmp = (n & 1 ? cosp : sinp); - cosp = (n & 1 ? sinp : cosp); - sinp = tmp; - - c1 = p->c0 + x2 * p->c1; - x5 = x3 * x2; - x6 = x4 * x2; - - s = x + x3 * p->s1; - c = c1 + x4 * p->c2; - - *sinp = s + x5 * s1; - *cosp = c + x6 * c2; -} - -// Return the sine of inputs X and X2 (X squared) using the polynomial P. -// N is the quadrant, and if odd the cosine polynomial is used. -static inline float sinf_poly(double x, double x2, const sincos_t *p, int n) { - double x3, x4, x6, x7, s, c, c1, c2, s1; - - if ((n & 1) == 0) { - x3 = x * x2; - s1 = p->s2 + x2 * p->s3; - - x7 = x3 * x2; - s = x + x3 * p->s1; - - return s + x7 * s1; +// Exceptional cases for cosf. +static constexpr int COSF_EXCEPTS = 6; + +static constexpr fputil::ExceptionalValues CosfExcepts{ + /* inputs */ { + 0x55325019, // x = 0x1.64a032p43 + 0x5922aa80, // x = 0x1.4555p51 + 0x5aa4542c, // x = 0x1.48a858p54 + 0x5f18b878, // x = 0x1.3170fp63 + 0x6115cb11, // x = 0x1.2b9622p67 + 0x7beef5ef, // x = 0x1.ddebdep120 + }, + /* outputs (RZ, RU offset, RD offset, RN offset) */ + { + {0x3f4ea5d2, 1, 0, 0}, // x = 0x1.64a032p43, cos(x) = 0x1.9d4ba4p-1 (RZ) + {0x3f08aebe, 1, 0, 1}, // x = 0x1.4555p51, cos(x) = 0x1.115d7cp-1 (RZ) + {0x3efa40a4, 1, 0, 0}, // x = 0x1.48a858p54, cos(x) = 0x1.f48148p-2 (RZ) + {0x3f7f14bb, 1, 0, 0}, // x = 0x1.3170fp63, cos(x) = 0x1.fe2976p-1 (RZ) + {0x3f78142e, 1, 0, 1}, // x = 0x1.2b9622p67, cos(x) = 0x1.f0285cp-1 (RZ) + {0x3f08a21c, 1, 0, + 0}, // x = 0x1.ddebdep120, cos(x) = 0x1.114438p-1 (RZ) + }}; + +// Lookup table for sin(k * pi / 16) with k = 0, ..., 31. +// Table is generated with Sollya as follow: +// > display = hexadecimal; +// > for k from 0 to 31 do { D(sin(k * pi/16)); }; +const double SIN_K_PI_OVER_16[32] = { + 0x0.0000000000000p+0, 0x1.8f8b83c69a60bp-3, 0x1.87de2a6aea963p-2, + 0x1.1c73b39ae68c8p-1, 0x1.6a09e667f3bcdp-1, 0x1.a9b66290ea1a3p-1, + 0x1.d906bcf328d46p-1, 0x1.f6297cff75cb0p-1, 0x1.0000000000000p+0, + 0x1.f6297cff75cb0p-1, 0x1.d906bcf328d46p-1, 0x1.a9b66290ea1a3p-1, + 0x1.6a09e667f3bcdp-1, 0x1.1c73b39ae68c8p-1, 0x1.87de2a6aea963p-2, + 0x1.8f8b83c69a60bp-3, 0x0.0000000000000p+0, -0x1.8f8b83c69a60bp-3, + -0x1.87de2a6aea963p-2, -0x1.1c73b39ae68c8p-1, -0x1.6a09e667f3bcdp-1, + -0x1.a9b66290ea1a3p-1, -0x1.d906bcf328d46p-1, -0x1.f6297cff75cb0p-1, + -0x1.0000000000000p+0, -0x1.f6297cff75cb0p-1, -0x1.d906bcf328d46p-1, + -0x1.a9b66290ea1a3p-1, -0x1.6a09e667f3bcdp-1, -0x1.1c73b39ae68c8p-1, + -0x1.87de2a6aea963p-2, -0x1.8f8b83c69a60bp-3}; + +static inline void sincosf_eval(double xd, uint32_t x_abs, double &sin_k, + double &cos_k, double &sin_y, double &cosm1_y) { + int64_t k; + double y; + + if (likely(x_abs < FAST_PASS_BOUND)) { + k = small_range_reduction(xd, y); } else { - x4 = x2 * x2; - c2 = p->c3 + x2 * p->c4; - c1 = p->c0 + x2 * p->c1; - - x6 = x4 * x2; - c = c1 + x4 * p->c2; - - return c + x6 * c2; + fputil::FPBits x_bits(x_abs); + k = large_range_reduction(xd, x_bits.get_exponent(), y); } -} - -// Fast range reduction using single multiply-subtract. Return the modulo of -// X as a value between -PI/4 and PI/4 and store the quadrant in NP. -// The values for PI/2 and 2/PI are accessed via P. Since PI/2 as a double -// is accurate to 55 bits and the worst-case cancellation happens at 6 * PI/4, -// the result is accurate for |X| <= 120.0. -static inline double reduce_fast(double x, const sincos_t *p, int *np) { - double r; - // Use scaled float to int conversion with explicit rounding. - // hpi_inv is prescaled by 2^24 so the quadrant ends up in bits 24..31. - // This avoids inaccuracies introduced by truncating negative values. - r = x * p->hpi_inv; - int n = ((int32_t)r + 0x800000) >> 24; - *np = n; - return x - n * p->hpi; -} - -// Reduce the range of XI to a multiple of PI/2 using fast integer arithmetic. -// XI is a reinterpreted float and must be >= 2.0f (the sign bit is ignored). -// Return the modulo between -PI/4 and PI/4 and store the quadrant in NP. -// Reduction uses a table of 4/PI with 192 bits of precision. A 32x96->128 bit -// multiply computes the exact 2.62-bit fixed-point modulo. Since the result -// can have at most 29 leading zeros after the binary point, the double -// precision result is accurate to 33 bits. -static inline double reduce_large(uint32_t xi, int *np) { - const uint32_t *arr = &INV_PIO4[(xi >> 26) & 15]; - int shift = (xi >> 23) & 7; - uint64_t n, res0, res1, res2; - - xi = (xi & 0xffffff) | 0x800000; - xi <<= shift; - - res0 = xi * arr[0]; - res1 = (uint64_t)xi * arr[4]; - res2 = (uint64_t)xi * arr[8]; - res0 = (res2 >> 32) | (res0 << 32); - res0 += res1; - n = (res0 + (1ULL << 61)) >> 62; - res0 -= n << 62; - double x = (int64_t)res0; - *np = n; - return x * PI63; + // After range reduction, k = round(x * 16 / pi) and y = (x * 16 / pi) - k. + // So k is an integer and -0.5 <= y <= 0.5. + // Then sin(x) = sin((k + y)*pi/16) + // = sin(y*pi/16) * cos(k*pi/16) + cos(y*pi/16) * sin(k*pi/16) + + sin_k = SIN_K_PI_OVER_16[k & 31]; + // cos(k * pi/16) = sin(k * pi/16 + pi/2) = sin((k + 8) * pi/16). + // cos_k = y * cos(k * pi/16) + cos_k = SIN_K_PI_OVER_16[(k + 8) & 31]; + + double ysq = y * y; + + // Degree-6 minimax even polynomial for sin(y*pi/16)/y generated by Sollya + // with: + // > Q = fpminimax(sin(y*pi/16)/y, [|0, 2, 4, 6|], [|D...|], [0, 0.5]); + sin_y = y * fputil::polyeval(ysq, 0x1.921fb54442d17p-3, -0x1.4abbce6256adp-10, + 0x1.466bc5a5ac6b3p-19, -0x1.32bdcb4207562p-29); + // Degree-8 minimax even polynomial for cos(y*pi/16) generated by Sollya with: + // > P = fpminimax(cos(x*pi/16), [|0, 2, 4, 6, 8|], [|1, D...|], [0, 0.5]); + // Note that cosm1_y = cos(y*pi/16) - 1. + cosm1_y = + ysq * fputil::polyeval(ysq, -0x1.3bd3cc9be45dcp-6, 0x1.03c1f081b08ap-14, + -0x1.55d3c6fb0fb6ep-24, 0x1.e1d3d60f58873p-35); } } // namespace __llvm_libc diff --git a/libc/src/math/generic/sinf.cpp b/libc/src/math/generic/sinf.cpp --- a/libc/src/math/generic/sinf.cpp +++ b/libc/src/math/generic/sinf.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "src/math/sinf.h" -#include "common_constants.h" +#include "sincosf_utils.h" #include "src/__support/FPUtil/BasicOperations.h" #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" @@ -21,19 +21,13 @@ #if defined(LIBC_TARGET_HAS_FMA) #include "range_reduction_fma.h" // using namespace __llvm_libc::fma; -using __llvm_libc::fma::FAST_PASS_BOUND; -using __llvm_libc::fma::large_range_reduction; using __llvm_libc::fma::N_EXCEPTS; using __llvm_libc::fma::SinfExcepts; -using __llvm_libc::fma::small_range_reduction; #else #include "range_reduction.h" // using namespace __llvm_libc::generic; -using __llvm_libc::generic::FAST_PASS_BOUND; -using __llvm_libc::generic::large_range_reduction; using __llvm_libc::generic::N_EXCEPTS; using __llvm_libc::generic::SinfExcepts; -using __llvm_libc::generic::small_range_reduction; #endif namespace __llvm_libc { @@ -143,55 +137,24 @@ return result; } - int k; - double y; - - if (likely(x_abs < FAST_PASS_BOUND)) { - k = small_range_reduction(xd, y); - } else { - // x is inf or nan. - if (unlikely(x_abs >= 0x7f80'0000U)) { - if (x_abs == 0x7f80'0000U) { - errno = EDOM; - fputil::set_except(FE_INVALID); - } - return x + - FPBits::build_nan(1 << (fputil::MantissaWidth::VALUE - 1)); + if (unlikely(x_abs >= 0x7f80'0000U)) { + if (x_abs == 0x7f80'0000U) { + errno = EDOM; + fputil::set_except(FE_INVALID); } - - k = large_range_reduction(xd, xbits.get_exponent(), y); + return x + + FPBits::build_nan(1 << (fputil::MantissaWidth::VALUE - 1)); } - // After range reduction, k = round(x * 16 / pi) and y = (x * 16 / pi) - k. - // So k is an integer and -0.5 <= y <= 0.5. - // Then sin(x) = sin((k + y)*pi/16) - // = sin(y*pi/16) * cos(k*pi/16) + cos(y*pi/16) * sin(k*pi/16) - - double ysq = y * y; - - // Degree-6 minimax even polynomial for sin(y*pi/16)/y generated by Sollya - // with: - // > Q = fpminimax(sin(y*pi/16)/y, [|0, 2, 4, 6|], [|D...|], [0, 0.5]); - double sin_y = - fputil::polyeval(ysq, 0x1.921fb54442d17p-3, -0x1.4abbce6256adp-10, - 0x1.466bc5a5ac6b3p-19, -0x1.32bdcb4207562p-29); - // Degree-8 minimax even polynomial for cos(y*pi/16) generated by Sollya with: - // > P = fpminimax(cos(x*pi/16), [|0, 2, 4, 6, 8|], [|1, D...|], [0, 0.5]); - // Note that cosm1_y = cos(y*pi/16) - 1. - double cosm1_y = - ysq * fputil::polyeval(ysq, -0x1.3bd3cc9be45dcp-6, 0x1.03c1f081b08ap-14, - -0x1.55d3c6fb0fb6ep-24, 0x1.e1d3d60f58873p-35); - - double sin_k = SIN_K_PI_OVER_16[k & 31]; - // cos(k * pi/16) = sin(k * pi/16 + pi/2) = sin((k + 8) * pi/16). - // cos_k = y * cos(k * pi/16) - double cos_k = y * SIN_K_PI_OVER_16[(k + 8) & 31]; - // Combine the results with the sine of sum formula: // sin(x) = sin((k + y)*pi/16) // = sin(y*pi/16) * cos(k*pi/16) + cos(y*pi/16) * sin(k*pi/16) // = sin_y * cos_k + (1 + cosm1_y) * sin_k // = sin_y * cos_k + (cosm1_y * sin_k + sin_k) + double sin_k, cos_k, sin_y, cosm1_y; + + sincosf_eval(xd, x_abs, sin_k, cos_k, sin_y, cosm1_y); + return fputil::multiply_add(sin_y, cos_k, fputil::multiply_add(cosm1_y, sin_k, sin_k)); } diff --git a/libc/test/src/math/exhaustive/CMakeLists.txt b/libc/test/src/math/exhaustive/CMakeLists.txt --- a/libc/test/src/math/exhaustive/CMakeLists.txt +++ b/libc/test/src/math/exhaustive/CMakeLists.txt @@ -55,6 +55,23 @@ -lpthread ) +add_fp_unittest( + sincosf_test + NO_RUN_POSTBUILD + NEED_MPFR + SUITE + libc_math_exhaustive_tests + SRCS + sincosf_test.cpp + DEPENDS + .exhaustive_test + libc.include.math + libc.src.math.sincosf + libc.src.__support.FPUtil.fputil + LINK_LIBRARIES + -lpthread +) + add_fp_unittest( expf_test NO_RUN_POSTBUILD diff --git a/libc/test/src/math/exhaustive/sincosf_test.cpp b/libc/test/src/math/exhaustive/sincosf_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/src/math/exhaustive/sincosf_test.cpp @@ -0,0 +1,77 @@ +//===-- Exhaustive test for sincosf ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "exhaustive_test.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/math/sincosf.h" +#include "utils/MPFRWrapper/MPFRUtils.h" +#include "utils/UnitTest/FPMatcher.h" + +#include + +using FPBits = __llvm_libc::fputil::FPBits; + +namespace mpfr = __llvm_libc::testing::mpfr; + +struct LlvmLibcSinCosfExhaustiveTest : public LlvmLibcExhaustiveTest { + bool check(uint32_t start, uint32_t stop, + mpfr::RoundingMode rounding) override { + mpfr::ForceRoundingMode r(rounding); + uint32_t bits = start; + bool result = true; + do { + FPBits xbits(bits); + float x = float(xbits); + float sinx, cosx; + __llvm_libc::sincosf(x, &sinx, &cosx); + result &= EXPECT_MPFR_MATCH(mpfr::Operation::Sin, x, sinx, 0.5, rounding); + result &= EXPECT_MPFR_MATCH(mpfr::Operation::Cos, x, cosx, 0.5, rounding); + } while (++bits < stop); + return result; + } +}; + +// Range: [0, +Inf); +static constexpr uint32_t POS_START = 0x0000'0000U; +static constexpr uint32_t POS_STOP = 0x7f80'0000U; + +TEST_F(LlvmLibcSinCosfExhaustiveTest, PostiveRangeRoundNearestTieToEven) { + test_full_range(POS_START, POS_STOP, mpfr::RoundingMode::Nearest); +} + +TEST_F(LlvmLibcSinCosfExhaustiveTest, PostiveRangeRoundUp) { + test_full_range(POS_START, POS_STOP, mpfr::RoundingMode::Upward); +} + +TEST_F(LlvmLibcSinCosfExhaustiveTest, PostiveRangeRoundDown) { + test_full_range(POS_START, POS_STOP, mpfr::RoundingMode::Downward); +} + +TEST_F(LlvmLibcSinCosfExhaustiveTest, PostiveRangeRoundTowardZero) { + test_full_range(POS_START, POS_STOP, mpfr::RoundingMode::TowardZero); +} + +// Range: (-Inf, 0]; +static constexpr uint32_t NEG_START = 0x8000'0000U; +static constexpr uint32_t NEG_STOP = 0xff80'0000U; + +TEST_F(LlvmLibcSinCosfExhaustiveTest, NegativeRangeRoundNearestTieToEven) { + test_full_range(NEG_START, NEG_STOP, mpfr::RoundingMode::Nearest); +} + +TEST_F(LlvmLibcSinCosfExhaustiveTest, NegativeRangeRoundUp) { + test_full_range(NEG_START, NEG_STOP, mpfr::RoundingMode::Upward); +} + +TEST_F(LlvmLibcSinCosfExhaustiveTest, NegativeRangeRoundDown) { + test_full_range(NEG_START, NEG_STOP, mpfr::RoundingMode::Downward); +} + +TEST_F(LlvmLibcSinCosfExhaustiveTest, NegativeRangeRoundTowardZero) { + test_full_range(NEG_START, NEG_STOP, mpfr::RoundingMode::TowardZero); +} diff --git a/libc/test/src/math/sincosf_test.cpp b/libc/test/src/math/sincosf_test.cpp --- a/libc/test/src/math/sincosf_test.cpp +++ b/libc/test/src/math/sincosf_test.cpp @@ -54,6 +54,40 @@ EXPECT_MATH_ERRNO(EDOM); } +#define EXPECT_SINCOS_MATCH_ALL_ROUNDING(input) \ + { \ + float sin, cos; \ + namespace mpfr = __llvm_libc::testing::mpfr; \ + \ + mpfr::ForceRoundingMode __r1(mpfr::RoundingMode::Nearest); \ + __llvm_libc::sincosf(input, &sin, &cos); \ + EXPECT_MPFR_MATCH(mpfr::Operation::Sin, input, sin, 0.5, \ + mpfr::RoundingMode::Nearest); \ + EXPECT_MPFR_MATCH(mpfr::Operation::Cos, input, cos, 0.5, \ + mpfr::RoundingMode::Nearest); \ + \ + mpfr::ForceRoundingMode __r2(mpfr::RoundingMode::Upward); \ + __llvm_libc::sincosf(input, &sin, &cos); \ + EXPECT_MPFR_MATCH(mpfr::Operation::Sin, input, sin, 0.5, \ + mpfr::RoundingMode::Upward); \ + EXPECT_MPFR_MATCH(mpfr::Operation::Cos, input, cos, 0.5, \ + mpfr::RoundingMode::Upward); \ + \ + mpfr::ForceRoundingMode __r3(mpfr::RoundingMode::Downward); \ + __llvm_libc::sincosf(input, &sin, &cos); \ + EXPECT_MPFR_MATCH(mpfr::Operation::Sin, input, sin, 0.5, \ + mpfr::RoundingMode::Downward); \ + EXPECT_MPFR_MATCH(mpfr::Operation::Cos, input, cos, 0.5, \ + mpfr::RoundingMode::Downward); \ + \ + mpfr::ForceRoundingMode __r4(mpfr::RoundingMode::TowardZero); \ + __llvm_libc::sincosf(input, &sin, &cos); \ + EXPECT_MPFR_MATCH(mpfr::Operation::Sin, input, sin, 0.5, \ + mpfr::RoundingMode::TowardZero); \ + EXPECT_MPFR_MATCH(mpfr::Operation::Cos, input, cos, 0.5, \ + mpfr::RoundingMode::TowardZero); \ + } + TEST(LlvmLibcSinCosfTest, InFloatRange) { constexpr uint32_t COUNT = 1000000; constexpr uint32_t STEP = UINT32_MAX / COUNT; @@ -62,31 +96,65 @@ if (isnan(x) || isinf(x)) continue; - float sin, cos; - __llvm_libc::sincosf(x, &sin, &cos); - ASSERT_MPFR_MATCH(mpfr::Operation::Cos, x, cos, 1.0); - ASSERT_MPFR_MATCH(mpfr::Operation::Sin, x, sin, 1.0); + EXPECT_SINCOS_MATCH_ALL_ROUNDING(x); + EXPECT_SINCOS_MATCH_ALL_ROUNDING(-x); } } -// For small values, cos(x) is 1 and sin(x) is x. -TEST(LlvmLibcSinCosfTest, SmallValues) { - uint32_t bits = 0x17800000; - float x = float(FPBits((bits))); - float result_cos, result_sin; - __llvm_libc::sincosf(x, &result_sin, &result_cos); - EXPECT_MPFR_MATCH(mpfr::Operation::Cos, x, result_cos, 1.0); - EXPECT_MPFR_MATCH(mpfr::Operation::Sin, x, result_sin, 1.0); - EXPECT_FP_EQ(1.0f, result_cos); - EXPECT_FP_EQ(x, result_sin); - - bits = 0x00400000; - x = float(FPBits((bits))); - __llvm_libc::sincosf(x, &result_sin, &result_cos); - EXPECT_MPFR_MATCH(mpfr::Operation::Cos, x, result_cos, 1.0); - EXPECT_MPFR_MATCH(mpfr::Operation::Sin, x, result_sin, 1.0); - EXPECT_FP_EQ(1.0f, result_cos); - EXPECT_FP_EQ(x, result_sin); +// For hard to round inputs. +TEST(LlvmLibcSinCosfTest, SpecialValues) { + constexpr int N = 43; + constexpr uint32_t INPUTS[N] = { + 0x3b56'37f5U, // x = 0x1.ac6feap-9f + 0x3f06'0a92U, // x = pi/6 + 0x3f3a'dc51U, // x = 0x1.75b8a2p-1f + 0x3f49'0fdbU, // x = pi/4 + 0x3f86'0a92U, // x = pi/3 + 0x3fa7'832aU, // x = 0x1.4f0654p+0f + 0x3fc9'0fdbU, // x = pi/2 + 0x4017'1973U, // x = 0x1.2e32e6p+1f + 0x4049'0fdbU, // x = pi + 0x4096'cbe4U, // x = 0x1.2d97c8p+2f + 0x40c9'0fdbU, // x = 2*pi + 0x433b'7490U, // x = 0x1.76e92p+7f + 0x437c'e5f1U, // x = 0x1.f9cbe2p+7f + 0x4619'9998U, // x = 0x1.33333p+13f + 0x474d'246fU, // x = 0x1.9a48dep+15f + 0x4afd'ece4U, // x = 0x1.fbd9c8p+22f + 0x4c23'32e9U, // x = 0x1.4665d2p+25f + 0x50a3'e87fU, // x = 0x1.47d0fep+34f + 0x5239'47f6U, // x = 0x1.728fecp+37f + 0x53b1'46a6U, // x = 0x1.628d4cp+40f + 0x5532'5019U, // x = 0x1.64a032p+43f + 0x55ca'fb2aU, // x = 0x1.95f654p+44f + 0x588e'f060U, // x = 0x1.1de0cp+50f + 0x5922'aa80U, // x = 0x1.4555p+51f + 0x5aa4'542cU, // x = 0x1.48a858p+54f + 0x5c07'bcd0U, // x = 0x1.0f79ap+57f + 0x5ebc'fddeU, // x = 0x1.79fbbcp+62f + 0x5f18'b878U, // x = 0x1.3170fp+63f + 0x5fa6'eba7U, // x = 0x1.4dd74ep+64f + 0x6115'cb11U, // x = 0x1.2b9622p+67f + 0x61a4'0b40U, // x = 0x1.48168p+68f + 0x6386'134eU, // x = 0x1.0c269cp+72f + 0x6589'8498U, // x = 0x1.13093p+76f + 0x6600'0001U, // x = 0x1.000002p+77f + 0x664e'46e4U, // x = 0x1.9c8dc8p+77f + 0x66b0'14aaU, // x = 0x1.602954p+78f + 0x67a9'242bU, // x = 0x1.524856p+80f + 0x6a19'76f1U, // x = 0x1.32ede2p+85f + 0x6c55'da58U, // x = 0x1.abb4bp+89f + 0x6f79'be45U, // x = 0x1.f37c8ap+95f + 0x7276'69d4U, // x = 0x1.ecd3a8p+101f + 0x7758'4625U, // x = 0x1.b08c4ap+111f + 0x7bee'f5efU, // x = 0x1.ddebdep+120f + }; + + for (int i = 0; i < N; ++i) { + float x = float(FPBits(INPUTS[i])); + EXPECT_SINCOS_MATCH_ALL_ROUNDING(x); + EXPECT_SINCOS_MATCH_ALL_ROUNDING(-x); + } } // SDCOMP-26094: check sinf in the cases for which the range reducer @@ -94,9 +162,7 @@ TEST(LlvmLibcSinCosfTest, SDCOMP_26094) { for (uint32_t v : SDCOMP26094_VALUES) { float x = float(FPBits((v))); - float sin, cos; - __llvm_libc::sincosf(x, &sin, &cos); - EXPECT_MPFR_MATCH(mpfr::Operation::Cos, x, cos, 1.0); - EXPECT_MPFR_MATCH(mpfr::Operation::Sin, x, sin, 1.0); + EXPECT_SINCOS_MATCH_ALL_ROUNDING(x); + EXPECT_SINCOS_MATCH_ALL_ROUNDING(-x); } } diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -442,16 +442,6 @@ ], ) -cc_library( - name = "sincosf_utils", - srcs = ["src/math/generic/sincosf_data.cpp"], - hdrs = ["src/math/generic/sincosf_utils.h"], - deps = [ - ":libc_root", - ":math_utils", - ], -) - cc_library( name = "common_constants", srcs = ["src/math/generic/common_constants.cpp"], @@ -476,6 +466,17 @@ ], ) +cc_library( + name = "sincosf_utils", + hdrs = ["src/math/generic/sincosf_utils.h"], + deps = [ + ":__support_fputil", + ":__support_fputil_polyeval", + ":range_reduction", + ":libc_root", + ], +) + libc_math_function( name = "expm1f", additional_deps = [ @@ -639,15 +640,16 @@ ":__support_fputil_fma", ":__support_fputil_multiply_add", ":__support_fputil_polyeval", - ":common_constants", - ":range_reduction", + ":sincosf_utils", ], ) libc_math_function( name = "sincosf", additional_deps = [ - ":math_utils", + ":__support_fputil_fma", + ":__support_fputil_multiply_add", + ":__support_fputil_polyeval", ":sincosf_utils", ], ) @@ -658,8 +660,8 @@ ":__support_fputil_fma", ":__support_fputil_multiply_add", ":__support_fputil_polyeval", - ":common_constants", ":range_reduction", + ":sincosf_utils", ], )