diff --git a/libc/cmake/modules/LLVMLibCObjectRules.cmake b/libc/cmake/modules/LLVMLibCObjectRules.cmake --- a/libc/cmake/modules/LLVMLibCObjectRules.cmake +++ b/libc/cmake/modules/LLVMLibCObjectRules.cmake @@ -504,7 +504,7 @@ list(SORT flags) if(SHOW_INTERMEDIATE_OBJECTS AND flags) - message(STATUS "Object library ${fq_target_name} has FLAGS: ${flags}") + message(STATUS "Entrypoint object ${fq_target_name} has FLAGS: ${flags}") endif() if(NOT ADD_TO_EXPAND_NAME) diff --git a/libc/docs/math.rst b/libc/docs/math.rst --- a/libc/docs/math.rst +++ b/libc/docs/math.rst @@ -164,7 +164,7 @@ log10 |check| log1p |check| log2 |check| -sin 0.561 ULPs large +sin |check| large sincos 0.776 ULPs large sqrt |check| |check| |check| ============== ================ =============== ====================== @@ -205,13 +205,13 @@ +--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ | expm1f | 14 | 53 | 59 | 146 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA | +--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ -| fmodf (n) | 73 | 263 | - | - | [MIN_NORMAL, MAX_NORMAL] | i5 mobile | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | | +| fmodf | 73 | 263 | - | - | [MIN_NORMAL, MAX_NORMAL] | i5 mobile | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | | +| +-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ +| | 9 | 11 | - | - | [0, MAX_SUBNORMAL] | i5 mobile | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | | +--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ -| fmodf (d) | 9 | 11 | - | - | [0, MAX_SUBNORMAL] | i5 mobile | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | | -+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ -| fmod (n) | 595 | 3297 | - | - | [MIN_NORMAL, MAX_NORMAL] | i5 mobile | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | | -+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ -| fmod (d) | 14 | 13 | - | - | [0, MAX_SUBNORMAL] | i5 mobile | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | | +| fmod | 595 | 3297 | - | - | [MIN_NORMAL, MAX_NORMAL] | i5 mobile | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | | +| +-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ +| | 14 | 13 | - | - | [0, MAX_SUBNORMAL] | i5 mobile | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | | +--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ | hypotf | 25 | 15 | 64 | 49 | :math:`[-10, 10] \times [-10, 10]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | | +--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ @@ -223,7 +223,7 @@ +--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ | log2f | 13 | 10 | 57 | 46 | :math:`[e^{-1}, e]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA | +--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ -| sinf | 36 | 31 | 72 | 71 | :math:`[-\pi, \pi]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | | +| sinf | 14 | 26 | 65 | 59 | :math:`[-\pi, \pi]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA | +--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ References diff --git a/libc/src/__support/FPUtil/CMakeLists.txt b/libc/src/__support/FPUtil/CMakeLists.txt --- a/libc/src/__support/FPUtil/CMakeLists.txt +++ b/libc/src/__support/FPUtil/CMakeLists.txt @@ -13,6 +13,7 @@ NormalFloat.h PlatformDefs.h builtin_wrappers.h + except_value_utils.h DEPENDS libc.include.math libc.include.errno @@ -69,4 +70,12 @@ .multiply_add ) +add_header_library( + nearest_integer + HDRS + nearest_integer.h + DEPENDS + libc.src.__support.common +) + add_subdirectory(generic) diff --git a/libc/src/__support/FPUtil/aarch64/nearest_integer.h b/libc/src/__support/FPUtil/aarch64/nearest_integer.h new file mode 100644 --- /dev/null +++ b/libc/src/__support/FPUtil/aarch64/nearest_integer.h @@ -0,0 +1,30 @@ +//===--- Round floating point to nearest integer on aarch64 -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_SUPPORT_FPUTIL_AARCH64_NEAREST_INTEGER_H +#define LLVM_LIBC_SRC_SUPPORT_FPUTIL_AARCH64_NEAREST_INTEGER_H + +#include "src/__support/architectures.h" + +#if !defined(LLVM_LIBC_ARCH_AARCH64) +#error "Invalid include" +#endif + +namespace __llvm_libc { +namespace fputil { + +static inline double nearest_integer(double x) { + double result; + __asm__ __volatile__("frintn %d0, %d1\n\t" : "=w"(result) : "w"(x)); + return result; +} + +} // namespace fputil +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_SUPPORT_FPUTIL_AARCH64_NEAREST_INTEGER_H diff --git a/libc/src/__support/FPUtil/except_value_utils.h b/libc/src/__support/FPUtil/except_value_utils.h new file mode 100644 --- /dev/null +++ b/libc/src/__support/FPUtil/except_value_utils.h @@ -0,0 +1,70 @@ +//===-- Common header for helpers to set exceptional values -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_SUPPORT_FPUTIL_EXCEPT_VALUE_UTILS_H +#define LLVM_LIBC_SRC_SUPPORT_FPUTIL_EXCEPT_VALUE_UTILS_H + +#include "FEnvImpl.h" +#include "FPBits.h" + +namespace __llvm_libc { + +namespace fputil { + +template struct ExceptionalValues { + using UIntType = typename FPBits::UIntType; + static constexpr int SIZE = N; + // Input bits. + UIntType inputs[SIZE]; + // Output bits contains 4 values: + // output[i][0]: output bits corresponding to FE_TOWARDZERO + // output[i][1]: offset for FE_UPWARD + // output[i][2]: offset for FE_DOWNWARD + // output[i][3]: offset for FE_TONEAREST + UIntType outputs[SIZE][4]; +}; + +template struct ExceptionChecker { + using UIntType = typename FPBits::UIntType; + using FPBits = FPBits; + using ExceptionalValues = ExceptionalValues; + + static bool check_odd_func(const ExceptionalValues &ExceptVals, + UIntType x_abs, bool sign, T &result) { + for (int i = 0; i < N; ++i) { + if (unlikely(x_abs == ExceptVals.inputs[i])) { + UIntType out_bits = ExceptVals.outputs[i][0]; // FE_TOWARDZERO + switch (fputil::get_round()) { + case FE_UPWARD: + out_bits += + sign ? ExceptVals.outputs[i][2] : ExceptVals.outputs[i][1]; + break; + case FE_DOWNWARD: + out_bits += + sign ? ExceptVals.outputs[i][1] : ExceptVals.outputs[i][2]; + break; + case FE_TONEAREST: + out_bits += ExceptVals.outputs[i][3]; + break; + } + result = FPBits(out_bits).get_val(); + if (sign) + result = -result; + + return true; + } + } + return false; + } +}; + +} // namespace fputil + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_SUPPORT_FPUTIL_EXCEPT_VALUE_UTILS_H diff --git a/libc/src/__support/FPUtil/nearest_integer.h b/libc/src/__support/FPUtil/nearest_integer.h new file mode 100644 --- /dev/null +++ b/libc/src/__support/FPUtil/nearest_integer.h @@ -0,0 +1,51 @@ +//===-- Fast rounding to nearest integer for floating point -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_SUPPORT_FPUTIL_NEAREST_INTEGER_H +#define LLVM_LIBC_SRC_SUPPORT_FPUTIL_NEAREST_INTEGER_H + +#include "src/__support/architectures.h" +#include "src/__support/common.h" + +#if (defined(LLVM_LIBC_ARCH_X86_64) && defined(__SSE4_2__)) +#include "x86_64/nearest_integer.h" +#elif defined(LLVM_LIBC_ARCH_AARCH64) +#include "aarch64/nearest_integer.h" +#else + +namespace __llvm_libc { +namespace fputil { + +// This is a fast implementation for rounding to a nearest integer that, in case +// of a tie, might pick a random one among 2 closest integers when the rounding +// mode is not FE_TONEAREST. +// +// Notice that for AARCH64 and x86-64 with SSE4.2 support, we will use their +// corresponding rounding instruction instead. And in those cases, the results +// are rounded to the nearest integer, tie-to-even. +static inline double nearest_integer(double x) { + if (x < 0x1p53 && x > -0x1p53) { + double r = x < 0 ? (x - 0x1.0p52) + 0x1.0p52 : (x + 0x1.0p52) - 0x1.0p52; + double diff = x - r; + // The expression above is correct for the default rounding mode, round-to- + // nearest, tie-to-even. For other rounding modes, it might be off by 1, + // which is corrected below. + if (unlikely(diff > 0.5)) + return r + 1.0; + if (unlikely(diff < -0.5)) + return r - 1.0; + return r; + } + return x; +} + +} // namespace fputil +} // namespace __llvm_libc + +#endif +#endif // LLVM_LIBC_SRC_SUPPORT_FPUTIL_NEAREST_INTEGER_H diff --git a/libc/src/__support/FPUtil/x86_64/nearest_integer.h b/libc/src/__support/FPUtil/x86_64/nearest_integer.h new file mode 100644 --- /dev/null +++ b/libc/src/__support/FPUtil/x86_64/nearest_integer.h @@ -0,0 +1,37 @@ +//===--- Round floating point to nearest integer on x86-64 ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_SUPPORT_FPUTIL_X86_64_NEAREST_INTEGER_H +#define LLVM_LIBC_SRC_SUPPORT_FPUTIL_X86_64_NEAREST_INTEGER_H + +#include "src/__support/architectures.h" + +#if !defined(LLVM_LIBC_ARCH_X86_64) +#error "Invalid include" +#endif + +#if !defined(__SSE4_2__) +#error "SSE4.2 instruction set is not supported" +#endif + +#include + +namespace __llvm_libc { +namespace fputil { + +static inline double nearest_integer(double x) { + __m128d xmm = _mm_set_sd(x); // NOLINT + __m128d ymm = + _mm_round_sd(xmm, xmm, _MM_ROUND_NEAREST | _MM_FROUND_NO_EXC); // NOLINT + return ymm[0]; +} + +} // namespace fputil +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_SUPPORT_FPUTIL_X86_64_NEAREST_INTEGER_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -76,11 +76,16 @@ sinf.cpp HDRS ../sinf.h + range_reduction.h + range_reduction_fma.h DEPENDS - .sincosf_utils libc.include.math libc.src.errno.errno libc.src.__support.FPUtil.fputil + libc.src.__support.FPUtil.fma + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.nearest_integer + libc.src.__support.FPUtil.polyeval COMPILE_OPTIONS -O3 ) diff --git a/libc/src/math/generic/range_reduction.h b/libc/src/math/generic/range_reduction.h new file mode 100644 --- /dev/null +++ b/libc/src/math/generic/range_reduction.h @@ -0,0 +1,131 @@ +//===-- Utilities for trigonometric functions -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_H +#define LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_H + +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/except_value_utils.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/nearest_integer.h" + +namespace __llvm_libc { + +namespace generic { + +static constexpr uint32_t FAST_PASS_BOUND = 0x4c80'0000U; // 2^26 + +static constexpr int N_ENTRIES = 8; + +// We choose to split bits of 1/pi into 28-bit precision pieces, so that the +// product of x * ONE_OVER_PI_28[i] is exact. +// These are generated by Sollya with: +// > a1 = D(round(1/pi, 28, RN)); a1; +// > a2 = D(round(1/pi - a1, 28, RN)); a2; +// > a3 = D(round(1/pi - a1 - a2, 28, RN)); a3; +// > a4 = D(round(1/pi - a1 - a2 - a3, 28, RN)); a4; +// ... +static constexpr double ONE_OVER_PI_28[N_ENTRIES] = { + 0x1.45f306ep-2, -0x1.b1bbeaep-33, 0x1.3f84ebp-62, -0x1.7056592p-92, + 0x1.c0db62ap-121, -0x1.4cd8778p-150, -0x1.bef806cp-179, 0x1.63abdecp-209}; + +// Exponents of the least significant bits of the corresponding entries in +// ONE_OVER_PI_28. +static constexpr int ONE_OVER_PI_28_LSB_EXP[N_ENTRIES] = { + -29, -60, -86, -119, -148, -175, -205, -235}; + +// Return (k mod 2) and y, where +// k = round(x / pi) and y = (x / pi) - k. +static inline int64_t small_range_reduction(double x, double &y) { + double prod = x * ONE_OVER_PI_28[0]; + double kd = fputil::nearest_integer(prod); + y = prod - kd; + y = fputil::multiply_add(x, ONE_OVER_PI_28[1], y); + y = fputil::multiply_add(x, ONE_OVER_PI_28[2], y); + return static_cast(kd); +} + +// Return k and y, where +// k = round(x / pi) and y = (x / pi) - k. +// For large range, there are at most 2 parts of ONE_OVER_PI_28 contributing to +// the unit binary digit (k & 1). If the least significant bit of x * the least +// significant bit of ONE_OVER_PI_28[i] > 1, we can completely ignore +// ONE_OVER_PI_28[i]. +static inline int64_t large_range_reduction(double x, int x_exp, double &y) { + int idx = 0; + y = 0; + int x_lsb_exp = x_exp - fputil::FloatProperties::MANTISSA_WIDTH; + + // Skipping the first parts of 1/pi such that: + // LSB of x * LSB of ONE_OVER_PI_28[i] > 1. + while (x_lsb_exp + ONE_OVER_PI_28_LSB_EXP[idx] > 0) + ++idx; + + double prod_hi = x * ONE_OVER_PI_28[idx]; + // Get the integral part of x * ONE_OVER_PI_28[idx] + double k_hi = fputil::nearest_integer(prod_hi); + // Get the fractional part of x * ONE_OVER_PI_28[idx] + double frac = prod_hi - k_hi; + double prod_lo = fputil::multiply_add(x, ONE_OVER_PI_28[idx + 1], frac); + double k_lo = fputil::nearest_integer(prod_lo); + + // Now y is the fractional parts. + y = prod_lo - k_lo; + y = fputil::multiply_add(x, ONE_OVER_PI_28[idx + 2], y); + y = fputil::multiply_add(x, ONE_OVER_PI_28[idx + 3], y); + + return static_cast(k_hi + k_lo); +} + +// Exceptional cases. +static constexpr int N_EXCEPT_SMALL = 4; + +static constexpr fputil::ExceptionalValues SmallExcepts{ + /* inputs */ { + 0x3fa7832a, // x = 0x1.4f0654p0 + 0x46199998, // x = 0x1.33333p13 + 0x4afdece4, // x = 0x1.fbd9c8p22 + 0x4c2332e9, // x = 0x1.4665d2p25 + }, + /* outputs (RZ, RU offset, RD offset, RN offset) */ + { + {0x3f7741b5, 1, 0, 1}, // x = 0x1.4f0654p0, sin(x) = 0x1.ee836ap-1 (RZ) + {0xbeb1fa5d, 0, 1, 0}, // x = 0x1.33333p13, sin(x) = -0x1.63f4bap-2 (RZ) + {0xbf7fb6e0, 0, 1, 1}, // x = 0x1.fbd9c8p22, sin(x) = -0x1.ff6dcp-1 (RZ) + {0xbf7fffff, 0, 1, + 1}, // x = 0x1.4665d2p25, sin(x) = -0x1.fffffep-1 (RZ) + }}; + +static constexpr int N_EXCEPT_LARGE = 5; + +static constexpr fputil::ExceptionalValues LargeExcepts{ + /* inputs */ { + 0x523947f6, // x = 0x1.728fecp37 + 0x53b146a6, // x = 0x1.628d4cp40 + 0x55cafb2a, // x = 0x1.95f654p44 + 0x6a1976f1, // x = 0x1.32ede2p85 + 0x77584625, // x = 0x1.b08c4ap111 + }, + /* outputs (RZ, RU offset, RD offset, RN offset) */ + { + {0xbf12791d, 0, 1, + 1}, // x = 0x1.728fecp37, sin(x) = -0x1.24f23ap-1 (RZ) + {0xbf7fffff, 0, 1, + 1}, // x = 0x1.628d4cp40, sin(x) = -0x1.fffffep-1 (RZ) + {0xbf7e7a16, 0, 1, + 1}, // x = 0x1.95f654p44, sin(x) = -0x1.fcf42cp-1 (RZ) + {0x3f7fffff, 1, 0, 1}, // x = 0x1.32ede2p85, sin(x) = 0x1.fffffep-1 (RZ) + {0xbf7fffff, 0, 1, + 1}, // x = 0x1.b08c4ap111, sin(x) = -0x1.fffffep-1 (RZ) + }}; + +} // namespace generic + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_H diff --git a/libc/src/math/generic/range_reduction_fma.h b/libc/src/math/generic/range_reduction_fma.h new file mode 100644 --- /dev/null +++ b/libc/src/math/generic/range_reduction_fma.h @@ -0,0 +1,129 @@ +//===-- Utilities for trigonometric functions with FMA ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_FMA_H +#define LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_FMA_H + +#include "src/__support/FPUtil/FMA.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/except_value_utils.h" +#include "src/__support/FPUtil/nearest_integer.h" + +namespace __llvm_libc { + +namespace fma { + +static constexpr uint32_t FAST_PASS_BOUND = 0x5880'0000U; // 2^50 + +// Digits of 1/pi, generated by Sollya with: +// > a0 = D(1/pi); +// > a1 = D(1/pi - a0); +// > a2 = D(1/pi - a0 - a1); +// > a3 = D(1/pi - a0 - a1 - a2); +static constexpr double ONE_OVER_PI[5] = { + 0x1.45f306dc9c883p-2, -0x1.6b01ec5417056p-56, -0x1.6447e493ad4cep-110, + 0x1.e21c820ff28b2p-164, -0x1.508510ea79237p-219}; + +// Return k and y, where +// k = round(x / pi) and y = (x / pi) - k. +// Assume x is non-negative. +static inline int64_t small_range_reduction(double x, double &y) { + double kd = fputil::nearest_integer(x * ONE_OVER_PI[0]); + y = fputil::fma(x, ONE_OVER_PI[0], -kd); + y = fputil::fma(x, ONE_OVER_PI[1], y); + return static_cast(kd); +} + +// Return k and y, where +// k = round(x / pi) and y = (x / pi) - k. +static inline int64_t large_range_reduction(double x, int x_exp, double &y) { + // 2^50 <= |x| < 2^104 + if (x_exp < 103) { + // - When x < 2^104, the unit bit is contained in the full exact product of + // x * ONE_OVER_PI[0]. + // - When 2^50 <= |x| < 2^55, the unit bit is contained + // in the last 8 bits of double(x * ONE_OVER_PI[0]). + // - When |x| >= 2^55, the LSB of double(x * ONE_OVER_PI[0]) is at least 2. + fputil::FPBits prod_hi(x * ONE_OVER_PI[0]); + prod_hi.bits &= (x_exp < 55) ? (~0xffULL) : (~0ULL); // |x| < 2^55 + double k_hi = fputil::nearest_integer(static_cast(prod_hi)); + double truncated_prod = fputil::fma(x, ONE_OVER_PI[0], -k_hi); + double prod_lo = fputil::fma(x, ONE_OVER_PI[1], truncated_prod); + double k_lo = fputil::nearest_integer(prod_lo); + y = fputil::fma(x, ONE_OVER_PI[1], truncated_prod - k_lo); + y = fputil::fma(x, ONE_OVER_PI[2], y); + y = fputil::fma(x, ONE_OVER_PI[3], y); + + return static_cast(k_lo); + } + + // - When x >= 2^104, the full exact product of x * ONE_OVER_PI[0] does not + // contain the unit bit, so we can ignore it completely. + // - When 2^104 <= |x| < 2^109, the unit bit is contained + // in the last 8 bits of double(x * ONE_OVER_PI[1]). + // - When |x| >= 2^109, the LSB of double(x * ONE_OVER_PI[1]) is at least 2. + fputil::FPBits prod_hi(x * ONE_OVER_PI[1]); + prod_hi.bits &= (x_exp < 109) ? (~0xffULL) : (~0ULL); // |x| < 2^55 + double k_hi = fputil::nearest_integer(static_cast(prod_hi)); + double truncated_prod = fputil::fma(x, ONE_OVER_PI[1], -k_hi); + double prod_lo = fputil::fma(x, ONE_OVER_PI[2], truncated_prod); + double k_lo = fputil::nearest_integer(prod_lo); + y = fputil::fma(x, ONE_OVER_PI[2], truncated_prod - k_lo); + y = fputil::fma(x, ONE_OVER_PI[3], y); + y = fputil::fma(x, ONE_OVER_PI[4], y); + + return static_cast(k_lo); +} + +// Exceptional cases. +static constexpr int N_EXCEPT_SMALL = 5; + +static constexpr fputil::ExceptionalValues SmallExcepts{ + /* inputs */ { + 0x3fa7832a, // x = 0x1.4f0654p0 + 0x433b7490, // x = 0x1.76e92p7 + 0x46199998, // x = 0x1.33333p13 + 0x4afdece4, // x = 0x1.fbd9c8p22 + 0x55cafb2a, // x = 0x1.95f654p44 + }, + /* outputs (RZ, RU offset, RD offset, RN offset) */ + { + {0x3f7741b5, 1, 0, 1}, // x = 0x1.4f0654p0, sin(x) = 0x1.ee836ap-1 (RZ) + {0xbf5cce62, 0, 1, 0}, // x = 0x1.76e92p7, sin(x) = -0x1.b99cc4p-1 (RZ) + {0xbeb1fa5d, 0, 1, 0}, // x = 0x1.33333p13, sin(x) = -0x1.63f4bap-2 (RZ) + {0xbf7fb6e0, 0, 1, 1}, // x = 0x1.fbd9c8p22, sin(x) = -0x1.ff6dcp-1 (RZ) + {0xbf7e7a16, 0, 1, + 1}, // x = 0x1.95f654p44, sin(x) = -0x1.fcf42cp-1 (RZ) + }}; + +static constexpr int N_EXCEPT_LARGE = 5; + +static constexpr fputil::ExceptionalValues LargeExcepts{ + /* inputs */ { + 0x5ebcfdde, // x = 0x1.79fbbcp62 + 0x5fa6eba7, // x = 0x1.4dd74ep64 + 0x6386134e, // x = 0x1.0c269cp72 + 0x6a1976f1, // x = 0x1.32ede2p85 + 0x727669d4, // x = 0x1.ecd3a8p101 + }, + /* outputs (RZ, RU offset, RD offset, RN offset) */ + { + {0x3f50622d, 1, 0, 0}, // x = 0x1.79fbbcp62, sin(x) = 0x1.a0c45ap-1 (RZ) + {0xbe52464a, 0, 1, + 0}, // x = 0x1.4dd74ep64, sin(x) = -0x1.a48c94p-3 (RZ) + {0x3f7cb2e7, 1, 0, 0}, // x = 0x1.0c269cp72, sin(x) = 0x1.f965cep-1 (RZ) + {0x3f7fffff, 1, 0, 1}, // x = 0x1.32ede2p85, sin(x) = 0x1.fffffep-1 (RZ) + {0xbf7a781d, 0, 1, + 0}, // x = 0x1.ecd3a8p101, sin(x) = -0x1.f4f038p-1 (RZ) + }}; + +} // namespace fma + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_FMA_H diff --git a/libc/src/math/generic/sinf.cpp b/libc/src/math/generic/sinf.cpp --- a/libc/src/math/generic/sinf.cpp +++ b/libc/src/math/generic/sinf.cpp @@ -7,63 +7,194 @@ //===----------------------------------------------------------------------===// #include "src/math/sinf.h" -#include "math_utils.h" -#include "sincosf_utils.h" - +#include "src/__support/FPUtil/BasicOperations.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/except_value_utils.h" +#include "src/__support/FPUtil/multiply_add.h" #include "src/__support/common.h" -#include -#include +#include + +#if defined(LIBC_TARGET_HAS_FMA) +#include "range_reduction_fma.h" +// using namespace __llvm_libc::fma; +using __llvm_libc::fma::FAST_PASS_BOUND; +using __llvm_libc::fma::large_range_reduction; +using __llvm_libc::fma::LargeExcepts; +using __llvm_libc::fma::N_EXCEPT_LARGE; +using __llvm_libc::fma::N_EXCEPT_SMALL; +using __llvm_libc::fma::small_range_reduction; +using __llvm_libc::fma::SmallExcepts; +#else +#include "range_reduction.h" +// using namespace __llvm_libc::generic; +using __llvm_libc::generic::FAST_PASS_BOUND; +using __llvm_libc::generic::large_range_reduction; +using __llvm_libc::generic::LargeExcepts; +using __llvm_libc::generic::N_EXCEPT_LARGE; +using __llvm_libc::generic::N_EXCEPT_SMALL; +using __llvm_libc::generic::small_range_reduction; +using __llvm_libc::generic::SmallExcepts; +#endif namespace __llvm_libc { -// Fast sinf implementation. Worst-case ULP is 0.5607, maximum relative -// error is 0.5303 * 2^-23. A single-step range reduction is used for -// small values. Large inputs have their range reduced using fast integer -// arithmetic. -LLVM_LIBC_FUNCTION(float, sinf, (float y)) { - double x = y; - double s; - int n; - const sincos_t *p = &SINCOSF_TABLE[0]; - - if (abstop12(y) < abstop12(PIO4)) { - s = x * x; - - if (unlikely(abstop12(y) < abstop12(as_float(0x39800000)))) { - if (unlikely(abstop12(y) < abstop12(as_float(0x800000)))) - // Force underflow for tiny y. - force_eval(s); - return y; +LLVM_LIBC_FUNCTION(float, sinf, (float x)) { + using FPBits = typename fputil::FPBits; + FPBits xbits(x); + + uint32_t x_u = xbits.uintval(); + uint32_t x_abs = x_u & 0x7fff'ffffU; + double xd, y; + + // Range reduction: + // For |x| > pi/16, we perform range reduction as follows: + // Find k and y such that: + // x = (k + y) * pi + // k is an integer + // |y| < 0.5 + // For small range (|x| < 2^50 when FMA instructions are available, 2^26 + // otherwise), this is done by performing: + // k = round(x * 1/pi) + // y = x * 1/pi - k + // For large range, to reduce the errors of estimating the integral part of + // (x/pi), we perform the following computation: + // k = round( trunc(x * 256/pi) / 256) + // y = x * 1/pi - k + // Notice that the 2-step rounding will technically putting the bound of y + // to be: |y| < 0.5 + 1/256. + // + // When FMA instructions are not available, we store the digits of 1/pi in + // chunks of 28-bit precision. This will make sure that the products: + // x * ONE_OVER_PI_28[i] are all exact. + // When FMA instructions are available, we simply store the digits of 1/pi in + // chunks of doubles (53-bit of precision). + // So when multiplying by the largest values of single precision, the + // resulting output should be correct up to 2^(-208 + 128) ~ 2^-80. By the + // worst-case analysis of range reduction, |y| >= 2^-38, so this should give + // us more than 40 bits of accuracy. For the worst-case estimation of range + // reduction, see for instances: + // Elementary Functions by J-M. Muller, Chapter 11, + // Handbook of Floating-Point Arithmetic by J-M. Muller et. al., + // Chapter 10.2. + // + // Once k and y are computed, we then deduce the answer by the sine of sum + // formula: + // sin(x) = sin((k + y)*pi) + // = sin(y*pi) * cos(k*pi) + cos(y*pi) * sin(k*pi) + // = (-1)^(k & 1) * sin(y*pi) + // ~ (-1)^(k & 1) * y * P(y^2) + // where y*P(y^2) is a degree-15 minimax polynomial generated by Sollya + // with: > Q = fpminimax(sin(x*pi)/x, [|0, 2, 4, 6, 8, 10, 12, 14|], + // [|D...|], [0, 0.5]); + + // |x| <= pi/16 + if (x_abs <= 0x3e49'0fdbU) { + xd = static_cast(x); + + // |x| < 0x1.d12ed2p-12f + if (x_abs < 0x39e8'9769U) { + if (unlikely(x_abs == 0U)) { + // For signed zeros. + return x; + } + // When |x| < 2^-12, the relative error of the approximation sin(x) ~ x + // is: + // |sin(x) - x| / |sin(x)| < |x^3| / (6|x|) + // = x^2 / 6 + // < 2^-25 + // < epsilon(1)/2. + // So the correctly rounded values of sin(x) are: + // = x - sign(x)*eps(x) if rounding mode = FE_TOWARDZERO, + // or (rounding mode = FE_UPWARD and x is + // negative), + // = x otherwise. + // To simplify the rounding decision and make it more efficient, we use + // fma(x, -2^-25, x) instead. + // An exhaustive test shows that this formula work correctly for all + // rounding modes up to |x| < 0x1.c555dep-11f. + // Note: to use the formula x - 2^-25*x to decide the correct rounding, we + // do need fma(x, -2^-25, x) to prevent underflow caused by -2^-25*x when + // |x| < 2^-125. For targets without FMA instructions, we simply use + // double for intermediate results as it is more efficient than using an + // emulated version of FMA. +#if defined(LIBC_TARGET_HAS_FMA) + return fputil::multiply_add(x, -0x1.0p-25f, x); +#else + return static_cast(fputil::multiply_add(xd, -0x1.0p-25, xd)); +#endif // LIBC_TARGET_HAS_FMA } - return sinf_poly(x, s, p, 0); - } else if (likely(abstop12(y) < abstop12(120.0f))) { - x = reduce_fast(x, p, &n); - - // Setup the signs for sin and cos. - s = p->sign[n & 3]; + // |x| < pi/16. + double xsq = xd * xd; + + // Degree-9 polynomial approximation: + // sin(x) ~ x + a_3 x^3 + a_5 x^5 + a_7 x^7 + a_9 x^9 + // = x (1 + a_3 x^2 + ... + a_9 x^8) + // = x * P(x^2) + // generated by Sollya with the following commands: + // > display = hexadecimal; + // > Q = fpminimax(sin(x)/x, [|0, 2, 4, 6, 8|], [|1, D...|], [0, pi/16]); + double result = + fputil::polyeval(xsq, 1.0, -0x1.55555555554c6p-3, 0x1.1111111085e65p-7, + -0x1.a019f70fb4d4fp-13, 0x1.718d179815e74p-19); + return xd * result; + } - if (n & 2) - p = &SINCOSF_TABLE[1]; + bool x_sign = xbits.get_sign(); - return sinf_poly(x * s, x * x, p, n); - } else if (abstop12(y) < abstop12(INFINITY)) { - uint32_t xi = as_uint32_bits(y); - int sign = xi >> 31; + int64_t k; + xd = static_cast(x); - x = reduce_large(xi, &n); + if (x_abs < FAST_PASS_BOUND) { + using ExceptChecker = + typename fputil::ExceptionChecker; + if (float result; + ExceptChecker::check_odd_func(SmallExcepts, x_abs, x_sign, result)) { + return result; + } - // Setup signs for sin and cos - include original sign. - s = p->sign[(n + sign) & 3]; + k = small_range_reduction(xd, y); + } else { + // x is inf or nan. + if (unlikely(x_abs >= 0x7f80'0000U)) { + if (x_abs == 0x7f80'0000U) + errno = EDOM; + return x + + FPBits::build_nan(1 << (fputil::MantissaWidth::VALUE - 1)); + } - if ((n + sign) & 2) - p = &SINCOSF_TABLE[1]; + using ExceptChecker = + typename fputil::ExceptionChecker; + if (float result; + ExceptChecker::check_odd_func(LargeExcepts, x_abs, x_sign, result)) + return result; - return sinf_poly(x * s, x * x, p, n); + k = large_range_reduction(xd, xbits.get_exponent(), y); } - return invalid(y); + // After range reduction, k = round(x / pi) and y = (x/pi) - k. + // So k is an integer and -0.5 <= y <= 0.5. + // Then sin(x) = sin(y*pi + k*pi) + // = (-1)^(k & 1) * sin(y*pi) + // ~ (-1)^(k & 1) * y * P(y^2) + // where y*P(y^2) is a degree-15 minimax polynomial generated by Sollya + // with: > P = fpminimax(sin(x*pi)/x, [|0, 2, 4, 6, 8, 10, 12, 14|], + // [|D...|], [0, 0.5]); + + constexpr double SIGN[2] = {1.0, -1.0}; + + double ysq = y * y; + double result = + y * fputil::polyeval(ysq, 0x1.921fb54442d17p1, -0x1.4abbce625bd4bp2, + 0x1.466bc67750a3fp1, -0x1.32d2cce1612b5p-1, + 0x1.507832417bce6p-4, -0x1.e3062119b6071p-8, + 0x1.e89c7aa14122dp-12, -0x1.625b1709dece6p-16); + + return SIGN[k & 1] * result; + // } } } // namespace __llvm_libc diff --git a/libc/test/src/math/exhaustive/CMakeLists.txt b/libc/test/src/math/exhaustive/CMakeLists.txt --- a/libc/test/src/math/exhaustive/CMakeLists.txt +++ b/libc/test/src/math/exhaustive/CMakeLists.txt @@ -23,15 +23,19 @@ add_fp_unittest( sinf_test + NO_RUN_POSTBUILD NEED_MPFR SUITE libc_math_exhaustive_tests SRCS sinf_test.cpp DEPENDS + .exhaustive_test libc.include.math libc.src.math.sinf libc.src.__support.FPUtil.fputil + LINK_LIBRARIES + -lpthread ) add_fp_unittest( diff --git a/libc/test/src/math/exhaustive/sinf_test.cpp b/libc/test/src/math/exhaustive/sinf_test.cpp --- a/libc/test/src/math/exhaustive/sinf_test.cpp +++ b/libc/test/src/math/exhaustive/sinf_test.cpp @@ -6,20 +6,71 @@ // //===----------------------------------------------------------------------===// +#include "exhaustive_test.h" #include "src/__support/FPUtil/FPBits.h" #include "src/math/sinf.h" #include "utils/MPFRWrapper/MPFRUtils.h" -#include +#include "utils/UnitTest/FPMatcher.h" + +#include using FPBits = __llvm_libc::fputil::FPBits; namespace mpfr = __llvm_libc::testing::mpfr; -TEST(LlvmLibcsinffExhaustiveTest, AllValues) { - uint32_t bits = 0; - do { - FPBits xbits(bits); - float x = float(xbits); - ASSERT_MPFR_MATCH(mpfr::Operation::Sin, x, __llvm_libc::sinf(x), 1.0); - } while (bits++ < 0xffff'ffffU); +struct LlvmLibcSinfExhaustiveTest : public LlvmLibcExhaustiveTest { + bool check(uint32_t start, uint32_t stop, + mpfr::RoundingMode rounding) override { + mpfr::ForceRoundingMode r(rounding); + uint32_t bits = start; + bool result = true; + do { + FPBits xbits(bits); + float x = float(xbits); + bool r = EXPECT_MPFR_MATCH(mpfr::Operation::Sin, x, __llvm_libc::sinf(x), + 0.5, rounding); + result &= r; + } while (++bits < stop); + return result; + } +}; + +// Range: [0, +Inf); +static constexpr uint32_t POS_START = 0x0000'0000U; +static constexpr uint32_t POS_STOP = 0x7f80'0000U; + +TEST_F(LlvmLibcSinfExhaustiveTest, PostiveRangeRoundNearestTieToEven) { + test_full_range(POS_START, POS_STOP, mpfr::RoundingMode::Nearest); +} + +TEST_F(LlvmLibcSinfExhaustiveTest, PostiveRangeRoundUp) { + test_full_range(POS_START, POS_STOP, mpfr::RoundingMode::Upward); +} + +TEST_F(LlvmLibcSinfExhaustiveTest, PostiveRangeRoundDown) { + test_full_range(POS_START, POS_STOP, mpfr::RoundingMode::Downward); +} + +TEST_F(LlvmLibcSinfExhaustiveTest, PostiveRangeRoundTowardZero) { + test_full_range(POS_START, POS_STOP, mpfr::RoundingMode::TowardZero); +} + +// Range: (-Inf, 0]; +static constexpr uint32_t NEG_START = 0x8000'0000U; +static constexpr uint32_t NEG_STOP = 0xff80'0000U; + +TEST_F(LlvmLibcSinfExhaustiveTest, NegativeRangeRoundNearestTieToEven) { + test_full_range(NEG_START, NEG_STOP, mpfr::RoundingMode::Nearest); +} + +TEST_F(LlvmLibcSinfExhaustiveTest, NegativeRangeRoundUp) { + test_full_range(NEG_START, NEG_STOP, mpfr::RoundingMode::Upward); +} + +TEST_F(LlvmLibcSinfExhaustiveTest, NegativeRangeRoundDown) { + test_full_range(NEG_START, NEG_STOP, mpfr::RoundingMode::Downward); +} + +TEST_F(LlvmLibcSinfExhaustiveTest, NegativeRangeRoundTowardZero) { + test_full_range(NEG_START, NEG_STOP, mpfr::RoundingMode::TowardZero); } diff --git a/libc/test/src/math/sinf_test.cpp b/libc/test/src/math/sinf_test.cpp --- a/libc/test/src/math/sinf_test.cpp +++ b/libc/test/src/math/sinf_test.cpp @@ -51,26 +51,70 @@ float x = float(FPBits(v)); if (isnan(x) || isinf(x)) continue; - ASSERT_MPFR_MATCH(mpfr::Operation::Sin, x, __llvm_libc::sinf(x), 1.0); + ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sin, x, + __llvm_libc::sinf(x), 0.5); } } TEST(LlvmLibcSinfTest, SpecificBitPatterns) { - float x = float(FPBits(uint32_t(0xc70d39a1))); - EXPECT_MPFR_MATCH(mpfr::Operation::Sin, x, __llvm_libc::sinf(x), 1.0); + constexpr int N = 36; + constexpr uint32_t INPUTS[N] = { + 0x3f06'0a92U, // x = pi/6 + 0x3f3a'dc51U, // x = 0x1.75b8a2p-1f + 0x3f49'0fdbU, // x = pi/4 + 0x3f86'0a92U, // x = pi/3 + 0x3fa7'832aU, // x = 0x1.4f0654p+0f + 0x3fc9'0fdbU, // x = pi/2 + 0x4017'1973U, // x = 0x1.2e32e6p+1f + 0x4049'0fdbU, // x = pi + 0x4096'cbe4U, // x = 0x1.2d97c8p+2f + 0x40c9'0fdbU, // x = 2*pi + 0x433b'7490U, // x = 0x1.76e92p+7f + 0x437c'e5f1U, // x = 0x1.f9cbe2p+7f + 0x4619'9998U, // x = 0x1.33333p+13f + 0x474d'246fU, // x = 0x1.9a48dep+15f + 0x4afd'ece4U, // x = 0x1.fbd9c8p+22f + 0x4c23'32e9U, // x = 0x1.4665d2p+25f + 0x50a3'e87fU, // x = 0x1.47d0fep+34f + 0x5239'47f6U, // x = 0x1.728fecp+37f + 0x53b1'46a6U, // x = 0x1.628d4cp+40f + 0x55ca'fb2aU, // x = 0x1.95f654p+44f + 0x588e'f060U, // x = 0x1.1de0cp+50f + 0x5c07'bcd0U, // x = 0x1.0f79ap+57f + 0x5ebc'fddeU, // x = 0x1.79fbbcp+62f + 0x5fa6'eba7U, // x = 0x1.4dd74ep+64f + 0x61a4'0b40U, // x = 0x1.48168p+68f + 0x6386'134eU, // x = 0x1.0c269cp+72f + 0x6589'8498U, // x = 0x1.13093p+76f + 0x6600'0001U, // x = 0x1.000002p+77f + 0x664e'46e4U, // x = 0x1.9c8dc8p+77f + 0x66b0'14aaU, // x = 0x1.602954p+78f + 0x67a9'242bU, // x = 0x1.524856p+80f + 0x6a19'76f1U, // x = 0x1.32ede2p+85f + 0x6c55'da58U, // x = 0x1.abb4bp+89f + 0x6f79'be45U, // x = 0x1.f37c8ap+95f + 0x7276'69d4U, // x = 0x1.ecd3a8p+101f + 0x7758'4625U, // x = 0x1.b08c4ap+111f + }; + + for (int i = 0; i < N; ++i) { + float x = float(FPBits(INPUTS[i])); + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sin, x, + __llvm_libc::sinf(x), 0.5); + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sin, -x, + __llvm_libc::sinf(-x), 0.5); + } } // For small values, sin(x) is x. TEST(LlvmLibcSinfTest, SmallValues) { - float x = float(FPBits(uint32_t(0x17800000))); - float result = __llvm_libc::sinf(x); - EXPECT_MPFR_MATCH(mpfr::Operation::Sin, x, result, 1.0); - EXPECT_FP_EQ(x, result); - - x = float(FPBits(uint32_t(0x00400000))); - result = __llvm_libc::sinf(x); - EXPECT_MPFR_MATCH(mpfr::Operation::Sin, x, result, 1.0); - EXPECT_FP_EQ(x, result); + float x = float(FPBits(0x1780'0000U)); + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sin, x, __llvm_libc::sinf(x), + 0.5); + + x = float(FPBits(0x0040'0000U)); + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sin, x, __llvm_libc::sinf(x), + 0.5); } // SDCOMP-26094: check sinf in the cases for which the range reducer @@ -78,6 +122,7 @@ TEST(LlvmLibcSinfTest, SDCOMP_26094) { for (uint32_t v : SDCOMP26094_VALUES) { float x = float(FPBits((v))); - EXPECT_MPFR_MATCH(mpfr::Operation::Sin, x, __llvm_libc::sinf(x), 1.0); + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sin, x, + __llvm_libc::sinf(x), 0.5); } } diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -161,6 +161,7 @@ "src/__support/FPUtil/NormalFloat.h", "src/__support/FPUtil/PlatformDefs.h", "src/__support/FPUtil/builtin_wrappers.h", + "src/__support/FPUtil/except_value_utils.h", ] fputil_hdrs = selects.with_or({ @@ -263,6 +264,28 @@ ], ) +nearest_integer_common_hdrs = [ + "src/__support/FPUtil/nearest_integer.h", +] + +nearest_integer_platform_hdrs = [ + "src/__support/FPUtil/x86_64/nearest_integer.h", + "src/__support/FPUtil/aarch64/nearest_integer.h", +] + +cc_library( + name = "__support_fputil_nearest_integer", + hdrs = nearest_integer_common_hdrs, + # These are conditionally included and will #error out if the platform + # doesn't support rounding instructions, so they can't be compiled on their + # own. + textual_hdrs = nearest_integer_platform_hdrs, + deps = [ + ":__support_common", + ":libc_root", + ], +) + ################################ fenv targets ################################ libc_function( @@ -612,9 +635,14 @@ libc_math_function( name = "sinf", + hdrs = [ + "src/math/generic/range_reduction.h", + "src/math/generic/range_reduction_fma.h", + ], additional_deps = [ - ":math_utils", - ":sincosf_utils", + ":__support_fputil_fma", + ":__support_fputil_multiply_add", + ":__support_fputil_polyeval", ], )