diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -992,3 +992,15 @@ COMPILE_OPTIONS -O2 ) + +add_object_library( + dp_trig + SRCS + dp_trig.cpp + HDRS + dp_trig.h + DEPENDS + libc.utils.FPUtil.fputil + COMPILE_OPTIONS + -O3 +) diff --git a/libc/src/math/generic/dp_trig.h b/libc/src/math/generic/dp_trig.h new file mode 100644 --- /dev/null +++ b/libc/src/math/generic/dp_trig.h @@ -0,0 +1,22 @@ +//===-- Utilities for double precision trigonometric functions --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_GENERIC_DP_TRIG_H +#define LLVM_LIBC_SRC_MATH_GENERIC_DP_TRIG_H + +namespace __llvm_libc { + +double mod_2pi(double); + +double mod_pi_over_2(double); + +double mod_pi_over_4(double); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_MATH_GENERIC_DP_TRIG_H diff --git a/libc/src/math/generic/dp_trig.cpp b/libc/src/math/generic/dp_trig.cpp new file mode 100644 --- /dev/null +++ b/libc/src/math/generic/dp_trig.cpp @@ -0,0 +1,105 @@ +//===-- Utilities for double precision trigonometric functions ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "utils/FPUtil/FMA.h" +#include "utils/FPUtil/FPBits.h" +#include "utils/FPUtil/ManipulationFunctions.h" +#include "utils/FPUtil/XFloat.h" + +using FPBits = __llvm_libc::fputil::FPBits; + +namespace __llvm_libc { + +// Implementation is based on the Payne and Hanek range reduction algorithm. +// The caller should ensure that x is positive. +// Consider: +// x/y = x * 1/y = I + F +// I is the integral part and F the fractional part of the result of the +// division operation. Then M = mod(x, y) = F * y. In order to compute M, we +// first compute F. We do it by dropping bits from 1/y which would only +// contribute integral results in the operation x * 1/y. This helps us get +// accurate values of F even when x is a very large number. +// +// Internal operations are performed at 192 bits of precision. +static double mod_impl(double x, const uint64_t y_bits[3], + const uint64_t inv_y_bits[20], int y_exponent, + int inv_y_exponent) { + FPBits bits(x); + int exponent = bits.getExponent(); + int bit_drop = (exponent - 52) + inv_y_exponent + 1; + bit_drop = bit_drop >= 0 ? bit_drop : 0; + int word_drop = bit_drop / 64; + bit_drop %= 64; + fputil::UInt<256> man4; + for (size_t i = 0; i < 4; ++i) { + man4[3 - i] = inv_y_bits[word_drop + i]; + } + man4.shift_left(bit_drop); + fputil::UInt<192> man_bits; + for (size_t i = 0; i < 3; ++i) + man_bits[i] = man4[i + 1]; + fputil::XFloat<192> result(inv_y_exponent - word_drop * 64 - bit_drop, + man_bits); + result.mul(x); + result.drop_int(); // |result| now holds fractional part of x/y. + + fputil::UInt<192> y_man; + for (size_t i = 0; i < 3; ++i) + y_man[i] = y_bits[2 - i]; + fputil::XFloat<192> y_192(y_exponent, y_man); + return result.mul(y_192); +} + +static constexpr int TwoPIExponent = 2; + +// The mantissa bits of 2 * PI. +// The most signification bits are in the first uint64_t word +// and the least signification bits are in the last word. The +// first word includes the implicit '1' bit. +static constexpr uint64_t TwoPI[] = {0xc90fdaa22168c234, 0xc4c6628b80dc1cd1, + 0x29024e088a67cc74}; + +static constexpr int InvTwoPIExponent = -3; + +// The mantissa bits of 1/(2 * PI). +// The most signification bits are in the first uint64_t word +// and the least signification bits are in the last word. The +// first word includes the implicit '1' bit. +static constexpr uint64_t InvTwoPI[] = { + 0xa2f9836e4e441529, 0xfc2757d1f534ddc0, 0xdb6295993c439041, + 0xfe5163abdebbc561, 0xb7246e3a424dd2e0, 0x6492eea09d1921c, + 0xfe1deb1cb129a73e, 0xe88235f52ebb4484, 0xe99c7026b45f7e41, + 0x3991d639835339f4, 0x9c845f8bbdf9283b, 0x1ff897ffde05980f, + 0xef2f118b5a0a6d1f, 0x6d367ecf27cb09b7, 0x4f463f669e5fea2d, + 0x7527bac7ebe5f17b, 0x3d0739f78a5292ea, 0x6bfb5fb11f8d5d08, + 0x56033046fc7b6bab, 0xf0cfbc209af4361e}; + +double mod_2pi(double x) { + static constexpr double _2pi = 6.283185307179586; + if (x < _2pi) + return x; + return mod_impl(x, TwoPI, InvTwoPI, TwoPIExponent, InvTwoPIExponent); +} + +// Returns mod(x, pi/2) +double mod_pi_over_2(double x) { + static constexpr double pi_over_2 = 1.5707963267948966; + if (x < pi_over_2) + return x; + return mod_impl(x, TwoPI, InvTwoPI, TwoPIExponent - 2, InvTwoPIExponent + 2); +} + +// Returns mod(x, pi/4) +double mod_pi_over_4(double x) { + static constexpr double pi_over_4 = 0.7853981633974483; + if (x < pi_over_4) + return x; + return mod_impl(x, TwoPI, InvTwoPI, TwoPIExponent - 3, InvTwoPIExponent + 3); +} + +} // namespace __llvm_libc diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt --- a/libc/test/src/math/CMakeLists.txt +++ b/libc/test/src/math/CMakeLists.txt @@ -1182,6 +1182,18 @@ libc.utils.FPUtil.fputil ) +add_fp_unittest( + mod_k_pi_test + NEED_MPFR + SUITE + libc-long-running-tests + SRCS + mod_k_pi_test.cpp + DEPENDS + libc.src.math.generic.dp_trig + libc.utils.FPUtil.fputil +) + add_subdirectory(generic) add_subdirectory(exhaustive) add_subdirectory(differential_testing) diff --git a/libc/test/src/math/mod_k_pi_test.cpp b/libc/test/src/math/mod_k_pi_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/src/math/mod_k_pi_test.cpp @@ -0,0 +1,56 @@ +//===-- Unittests mod_2pi, mod_pi_over_4 and mod_pi_over_2 ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/generic/dp_trig.h" +#include "utils/FPUtil/TestHelpers.h" +#include "utils/MPFRWrapper/MPFRUtils.h" +#include "utils/UnitTest/Test.h" + +#include + +namespace mpfr = __llvm_libc::testing::mpfr; +using FPBits = __llvm_libc::fputil::FPBits; +using UIntType = FPBits::UIntType; + +TEST(LlvmLibcMod2PITest, Range) { + constexpr UIntType count = 1000000000; + constexpr UIntType step = UIntType(-1) / count; + for (UIntType i = 0, v = 0; i <= count; ++i, v += step) { + double x = double(FPBits(v)); + if (isnan(x) || isinf(x) || x <= 0.0) + continue; + + ASSERT_MPFR_MATCH(mpfr::Operation::Mod2PI, x, __llvm_libc::mod_2pi(x), 0); + } +} + +TEST(LlvmLibcModPIOver2Test, Range) { + constexpr UIntType count = 1000000000; + constexpr UIntType step = UIntType(-1) / count; + for (UIntType i = 0, v = 0; i <= count; ++i, v += step) { + double x = double(FPBits(v)); + if (isnan(x) || isinf(x) || x <= 0.0) + continue; + + ASSERT_MPFR_MATCH(mpfr::Operation::ModPIOver2, x, + __llvm_libc::mod_pi_over_2(x), 0); + } +} + +TEST(LlvmLibcModPIOver4Test, Range) { + constexpr UIntType count = 1000000000; + constexpr UIntType step = UIntType(-1) / count; + for (UIntType i = 0, v = 0; i <= count; ++i, v += step) { + double x = double(FPBits(v)); + if (isnan(x) || isinf(x) || x <= 0.0) + continue; + + ASSERT_MPFR_MATCH(mpfr::Operation::ModPIOver4, x, + __llvm_libc::mod_pi_over_4(x), 0); + } +} diff --git a/libc/utils/FPUtil/UInt.h b/libc/utils/FPUtil/UInt.h new file mode 100644 --- /dev/null +++ b/libc/utils/FPUtil/UInt.h @@ -0,0 +1,236 @@ +//===-- A class to manipulate wide integers. --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_UTILS_FPUTIL_UINT_H +#define LLVM_LIBC_UTILS_FPUTIL_UINT_H + +#include // For size_t +#include + +namespace __llvm_libc { +namespace fputil { + +template class UInt { + + // This is mainly used for debugging. + enum Kind { + NotANumber, + Valid, + }; + + static_assert(Bits > 0 && Bits % 64 == 0, + "Number of bits in UInt should be a multiple of 64."); + static constexpr uint64_t Mask32 = 0xFFFFFFFF; + static constexpr size_t WordCount = Bits / 64; + static constexpr uint64_t InvalidHexDigit = 20; + uint64_t val[WordCount]; + Kind kind; + + uint64_t low(uint64_t v) { return v & Mask32; } + + uint64_t high(uint64_t v) { return (v >> 32) & Mask32; } + + uint64_t hexval(char c) { + uint64_t diff; + if ((diff = uint64_t(c) - 'A') < 6) + return diff + 10; + else if ((diff = uint64_t(c) - 'a') < 6) + return diff + 10; + else if ((diff = uint64_t(c) - '0') < 10) + return diff; + else + return InvalidHexDigit; + } + + size_t strlen(const char *s) { + size_t len; + for (len = 0; *s != '\0'; ++s, ++len) + ; + return len; + } + +public: + UInt() { kind = Valid; } + + UInt(const UInt &other) : kind(other.kind) { + if (kind == Valid) { + for (size_t i = 0; i < WordCount; ++i) + val[i] = other.val[i]; + } + } + + // This constructor is used for debugging. + explicit UInt(const char *s) { + size_t len = strlen(s); + if (len > Bits / 4 + 2 || len < 3) { + kind = NotANumber; + return; + } + + if (!(s[0] == '0' && s[1] == 'x')) { + kind = NotANumber; + return; + } + + for (size_t i = 0; i < WordCount; ++i) + val[i] = 0; + + for (size_t i = len - 1, w = 0; i >= 2; --i, w += 4) { + uint64_t hex = hexval(s[i]); + if (hex == InvalidHexDigit) { + kind = NotANumber; + return; + } + val[w / 64] |= (hex << (w % 64)); + } + + kind = Valid; + } + + explicit UInt(uint64_t v) { + val[0] = v; + for (size_t i = 1; i < WordCount; ++i) + val[i] = 0; + kind = Valid; + } + + explicit UInt(uint64_t data[WordCount]) { + for (size_t i = 0; i < WordCount; ++i) + val[i] = data[i]; + kind = Valid; + } + + bool is_valid() const { return kind == Valid; } + + // Add x to this number and store the result in this number. + // Returns the carry value produced by the addition operation. + uint64_t add(const UInt &x) { + uint64_t carry = 0; + for (size_t i = 0; i < WordCount; ++i) { + uint64_t res_lo = low(val[i]) + low(x.val[i]) + carry; + carry = high(res_lo); + res_lo = low(res_lo); + + uint64_t res_hi = high(val[i]) + high(x.val[i]) + carry; + carry = high(res_hi); + res_hi = low(res_hi); + + val[i] = res_lo + (res_hi << 32); + } + return carry; + } + + // Multiply this number with x and store the result in this number. It is + // implemented using the long multiplication algorithm by splitting the + // 64-bit words of this number and |x| in to 32-bit halves but peforming + // the operations using 64-bit numbers. This ensures that we don't lose the + // carry bits. + // Returns the carry value produced by the multiplication operation. + uint64_t mul(uint64_t x) { + uint64_t x_lo = low(x); + uint64_t x_hi = high(x); + + uint64_t row1[WordCount + 1]; + uint64_t carry = 0; + for (size_t i = 0; i < WordCount; ++i) { + uint64_t l = low(val[i]); + uint64_t h = high(val[i]); + uint64_t p1 = x_lo * l; + uint64_t p2 = x_lo * h; + + uint64_t res_lo = low(p1) + carry; + carry = high(res_lo); + uint64_t res_hi = high(p1) + low(p2) + carry; + carry = high(res_hi) + high(p2); + + res_lo = low(res_lo); + res_hi = low(res_hi); + row1[i] = res_lo + (res_hi << 32); + } + row1[WordCount] = carry; + + uint64_t row2[WordCount + 1]; + row2[0] = 0; + carry = 0; + for (size_t i = 0; i < WordCount; ++i) { + uint64_t l = low(val[i]); + uint64_t h = high(val[i]); + uint64_t p1 = x_hi * l; + uint64_t p2 = x_hi * h; + + uint64_t res_lo = low(p1) + carry; + carry = high(res_lo); + uint64_t res_hi = high(p1) + low(p2) + carry; + carry = high(res_hi) + high(p2); + + res_lo = low(res_lo); + res_hi = low(res_hi); + row2[i] = res_lo + (res_hi << 32); + } + row2[WordCount] = carry; + + UInt<(WordCount + 1) * 64> r1(row1), r2(row2); + r2.shift_left(32); + r1.add(r2); + for (size_t i = 0; i < WordCount; ++i) { + val[i] = r1[i]; + } + return r1[WordCount]; + } + + void shift_left(size_t s) { + const size_t drop = s / 64; // Number of words to drop + const size_t shift = s % 64; // Bits to shift in the remaining words. + const uint64_t mask = ((uint64_t(1) << shift) - 1) << (64 - shift); + + for (size_t i = WordCount; drop > 0 && i > 0; --i) { + if (i - drop > 0) + val[i - 1] = val[i - drop - 1]; + else + val[i - 1] = 0; + } + for (size_t i = WordCount; shift > 0 && i > drop; --i) { + uint64_t drop_val = (val[i - 1] & mask) >> (64 - shift); + val[i - 1] <<= shift; + if (i < WordCount) + val[i] |= drop_val; + } + } + + void shift_right(size_t s) { + const size_t drop = s / 64; // Number of words to drop + const size_t shift = s % 64; // Bit shift in the remaining words. + const uint64_t mask = (uint64_t(1) << shift) - 1; + + for (size_t i = 0; drop > 0 && i < WordCount; ++i) { + if (i + drop < WordCount) + val[i] = val[i + drop]; + else + val[i] = 0; + } + for (size_t i = 0; shift > 0 && i < WordCount; ++i) { + uint64_t drop_val = ((val[i] & mask) << (64 - shift)); + val[i] >>= shift; + if (i > 0) + val[i - 1] |= drop_val; + } + } + + const uint64_t &operator[](size_t i) const { return val[i]; } + + uint64_t &operator[](size_t i) { return val[i]; } + + uint64_t *data() { return val; } + + const uint64_t *data() const { return val; } +}; + +} // namespace fputil +} // namespace __llvm_libc + +#endif // LLVM_LIBC_UTILS_FPUTIL_UINT_H diff --git a/libc/utils/FPUtil/XFloat.h b/libc/utils/FPUtil/XFloat.h new file mode 100644 --- /dev/null +++ b/libc/utils/FPUtil/XFloat.h @@ -0,0 +1,180 @@ +//===-- Utility class to manipulate wide floats. ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "FPBits.h" +#include "NormalFloat.h" +#include "UInt.h" + +#include + +namespace __llvm_libc { +namespace fputil { + +// Store and manipulate positive double precision numbers at |Precision| bits. +template class XFloat { + static constexpr uint64_t OneMask = (uint64_t(1) << 63); + UInt man; + static constexpr uint64_t WordCount = Precision / 64; + int exp; + + size_t bit_width(uint64_t x) { + if (x == 0) + return 0; + size_t shift = 0; + while ((OneMask & x) == 0) { + ++shift; + x <<= 1; + } + return 64 - shift; + } + +public: + XFloat() : exp(0) { + for (int i = 0; i < WordCount; ++i) + man[i] = 0; + } + + XFloat(const XFloat &other) : exp(other.exp) { + for (int i = 0; i < WordCount; ++i) + man[i] = other.man[i]; + } + + explicit XFloat(double x) { + auto xn = NormalFloat(x); + exp = xn.exponent; + man[WordCount - 1] = xn.mantissa << 11; + for (int i = 0; i < WordCount - 1; ++i) + man[i] = 0; + } + + XFloat(int e, const UInt &bits) : exp(e) { + for (size_t i = 0; i < WordCount; ++i) + man[i] = bits[i]; + } + + // Multiply this number with x and store the result in this number. + void mul(double x) { + auto xn = NormalFloat(x); + exp += xn.exponent; + uint64_t carry = man.mul(xn.mantissa << 11); + size_t carry_width = bit_width(carry); + carry_width = (carry_width == 64 ? 64 : 63); + man.shift_right(carry_width); + man[WordCount - 1] = man[WordCount - 1] + (carry << (64 - carry_width)); + exp += carry_width == 64 ? 1 : 0; + normalize(); + } + + void drop_int() { + if (exp < 0) + return; + if (exp > int(Precision - 1)) { + for (size_t i = 0; i < WordCount; ++i) + man[i] = 0; + return; + } + + man.shift_left(exp + 1); + man.shift_right(exp + 1); + + normalize(); + } + + double mul(const XFloat &other) { + constexpr size_t row_words = 2 * WordCount + 1; + constexpr size_t row_precision = row_words * 64; + constexpr size_t result_bits = 2 * Precision; + UInt rows[WordCount]; + + for (size_t r = 0; r < WordCount; ++r) { + for (size_t i = 0; i < row_words; ++i) { + if (i < WordCount) + rows[r][i] = man[i]; + else + rows[r][i] = 0; + } + rows[r].mul(other.man[r]); + rows[r].shift_left(r * 64); + } + + for (size_t r = 1; r < WordCount; ++r) { + rows[0].add(rows[r]); + } + int result_exp = exp + other.exp; + uint64_t carry = rows[0][row_words - 1]; + if (carry) { + size_t carry_width = bit_width(carry); + rows[0].shift_right(carry_width); + rows[0][row_words - 2] = + rows[0][row_words - 2] + (carry << (64 - carry_width)); + result_exp += carry_width; + } + + if (rows[0][row_words - 2] & OneMask) { + ++result_exp; + } else { + rows[0].shift_left(1); + } + + UInt result_man; + for (size_t i = 0; i < result_bits / 64; ++i) + result_man[i] = rows[0][i]; + XFloat result(result_exp, result_man); + result.normalize(); + return double(result); + } + + explicit operator double() { + normalize(); + + constexpr uint64_t one = uint64_t(1) << 10; + constexpr uint64_t excess_mask = (one << 1) - 1; + uint64_t excess = man[WordCount - 1] & excess_mask; + uint64_t new_man = man[WordCount - 1] >> 11; + if (excess > one) { + // We have to round up. + ++new_man; + } else if (excess == one) { + bool greater_than_one = false; + for (size_t i = 0; i < WordCount - 1; ++i) { + greater_than_one = (man[i] != 0); + if (greater_than_one) + break; + } + if (greater_than_one || (new_man & 1) != 0) { + ++new_man; + } + } + + if (new_man == (uint64_t(1) << 53)) + ++exp; + + // We use NormalFloat as it can produce subnormal numbers or underflow to 0 + // if necessary. + NormalFloat d(exp, new_man, 0); + return double(d); + } + + // Normalizes this number. + void normalize() { + uint64_t man_bits = 0; + for (size_t i = 0; i < WordCount; ++i) + man_bits |= man[i]; + + if (man_bits == 0) + return; + + while ((man[WordCount - 1] & OneMask) == 0) { + man.shift_left(1); + --exp; + } + } +}; + +} // namespace fputil +} // namespace __llvm_libc diff --git a/libc/utils/MPFRWrapper/MPFRUtils.h b/libc/utils/MPFRWrapper/MPFRUtils.h --- a/libc/utils/MPFRWrapper/MPFRUtils.h +++ b/libc/utils/MPFRWrapper/MPFRUtils.h @@ -30,6 +30,9 @@ Exp2, Expm1, Floor, + Mod2PI, + ModPIOver2, + ModPIOver4, Round, Sin, Sqrt, diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp --- a/libc/utils/MPFRWrapper/MPFRUtils.cpp +++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp @@ -61,7 +61,7 @@ mpfr_t value; public: - MPFRNumber() : mpfrPrecision(128) { mpfr_init2(value, mpfrPrecision); } + MPFRNumber() : mpfrPrecision(256) { mpfr_init2(value, mpfrPrecision); } // We use explicit EnableIf specializations to disallow implicit // conversions. Implicit conversions can potentially lead to loss of @@ -201,6 +201,33 @@ return result; } + MPFRNumber mod_2pi() const { + MPFRNumber result(0.0, 1280); + MPFRNumber _2pi(0.0, 1280); + mpfr_const_pi(_2pi.value, MPFR_RNDN); + mpfr_mul_si(_2pi.value, _2pi.value, 2, MPFR_RNDN); + mpfr_fmod(result.value, value, _2pi.value, MPFR_RNDN); + return result; + } + + MPFRNumber mod_pi_over_2() const { + MPFRNumber result(0.0, 1280); + MPFRNumber pi_over_2(0.0, 1280); + mpfr_const_pi(pi_over_2.value, MPFR_RNDN); + mpfr_mul_d(pi_over_2.value, pi_over_2.value, 0.5, MPFR_RNDN); + mpfr_fmod(result.value, value, pi_over_2.value, MPFR_RNDN); + return result; + } + + MPFRNumber mod_pi_over_4() const { + MPFRNumber result(0.0, 1280); + MPFRNumber pi_over_4(0.0, 1280); + mpfr_const_pi(pi_over_4.value, MPFR_RNDN); + mpfr_mul_d(pi_over_4.value, pi_over_4.value, 0.25, MPFR_RNDN); + mpfr_fmod(result.value, value, pi_over_4.value, MPFR_RNDN); + return result; + } + MPFRNumber sin() const { MPFRNumber result; mpfr_sin(result.value, value, MPFR_RNDN); @@ -325,6 +352,12 @@ return mpfrInput.expm1(); case Operation::Floor: return mpfrInput.floor(); + case Operation::Mod2PI: + return mpfrInput.mod_2pi(); + case Operation::ModPIOver2: + return mpfrInput.mod_pi_over_2(); + case Operation::ModPIOver4: + return mpfrInput.mod_pi_over_4(); case Operation::Round: return mpfrInput.round(); case Operation::Sin: