diff --git a/libc/src/__support/macros/properties/CMakeLists.txt b/libc/src/__support/macros/properties/CMakeLists.txt --- a/libc/src/__support/macros/properties/CMakeLists.txt +++ b/libc/src/__support/macros/properties/CMakeLists.txt @@ -1,17 +1,19 @@ add_header_library( - architectures - HDRS + architectures + HDRS architectures.h ) add_header_library( - compiler - HDRS + compiler + HDRS compiler.h ) add_header_library( - cpu_features - HDRS + cpu_features + HDRS cpu_features.h + DEPENDS + .architectures ) diff --git a/libc/src/__support/macros/properties/cpu_features.h b/libc/src/__support/macros/properties/cpu_features.h --- a/libc/src/__support/macros/properties/cpu_features.h +++ b/libc/src/__support/macros/properties/cpu_features.h @@ -12,6 +12,8 @@ #ifndef LLVM_LIBC_SRC_SUPPORT_MACROS_PROPERTIES_CPU_FEATURES_H #define LLVM_LIBC_SRC_SUPPORT_MACROS_PROPERTIES_CPU_FEATURES_H +#include "architectures.h" + #if defined(__SSE2__) #define LIBC_TARGET_CPU_HAS_SSE2 #endif @@ -41,4 +43,10 @@ #define LIBC_TARGET_CPU_HAS_FMA #endif +#if defined(LIBC_TARGET_ARCH_IS_AARCH64) || \ + (defined(LIBC_TARGET_ARCH_IS_X86_64) && \ + defined(LIBC_TARGET_CPU_HAS_SSE4_2)) +#define LIBC_TARGET_CPU_HAS_NEAREST_INT +#endif + #endif // LLVM_LIBC_SRC_SUPPORT_MACROS_PROPERTIES_CPU_FEATURES_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -1374,6 +1374,8 @@ .explogxf libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.rounding_mode + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.polyeval libc.src.__support.macros.optimization COMPILE_OPTIONS -O3 diff --git a/libc/src/math/generic/tanhf.cpp b/libc/src/math/generic/tanhf.cpp --- a/libc/src/math/generic/tanhf.cpp +++ b/libc/src/math/generic/tanhf.cpp @@ -8,66 +8,114 @@ #include "src/math/tanhf.h" #include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/rounding_mode.h" -#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY -#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/nearest_integer.h" +#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY +#include "src/__support/macros/properties/cpu_features.h" #include "src/math/generic/explogxf.h" namespace __llvm_libc { +// 2^6 * log2(e) +constexpr double LOG2_E_EXP2_6 = ExpBase::LOG2_B * 2.0; + LLVM_LIBC_FUNCTION(float, tanhf, (float x)) { using FPBits = typename fputil::FPBits; FPBits xbits(x); - bool sign = xbits.get_sign(); - uint32_t x_abs = xbits.uintval() & FPBits::FloatProp::EXP_MANT_MASK; + uint32_t x_u = xbits.uintval(); + uint32_t x_abs = x_u & FPBits::FloatProp::EXP_MANT_MASK; - // |x| <= 2^-26 - if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) { - return static_cast( - LIBC_UNLIKELY(x_abs == 0) ? x : (x - 0x1.5555555555555p-2 * x * x * x)); - } + // When |x| >= 15, or x is inf or nan, or |x| <= 0.078125 + if (LIBC_UNLIKELY((x_abs >= 0x4170'0000U) || (x_abs <= 0x3da0'0000U))) { + if (x_abs <= 0x3da0'0000U) { + // |x| <= 0.078125 + if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) { + // |x| <= 2^-26 + return (x_abs != 0) + ? static_cast(x - 0x1.5555555555555p-2 * x * x * x) + : x; + } + + const double TAYLOR[] = {-0x1.5555555555555p-2, 0x1.1111111111111p-3, + -0x1.ba1ba1ba1ba1cp-5, 0x1.664f4882c10fap-6, + -0x1.226e355e6c23dp-7}; + double xdbl = x; + double x2 = xdbl * xdbl; + // Taylor polynomial. + double x4 = x2 * x2; + double c0 = x2 * TAYLOR[0]; + double c1 = fputil::multiply_add(x2, TAYLOR[2], TAYLOR[1]); + double c2 = fputil::multiply_add(x2, TAYLOR[4], TAYLOR[3]); + double pe = fputil::polyeval(x4, c0, c1, c2); - // When |x| >= 15, or x is inf or nan - if (LIBC_UNLIKELY(x_abs >= 0x4170'0000U)) { - if (xbits.is_nan()) + return static_cast(fputil::multiply_add(xdbl, pe, xdbl)); + } + + // |x| >= 15 + if (LIBC_UNLIKELY(xbits.is_nan())) return x + 1.0f; // sNaN to qNaN + signal - if (xbits.is_inf()) - return sign ? -1.0f : 1.0f; + const double SIGNS[2][2] = {{1.0f, -0x1.0p-25f}, {-1.0f, 0x1.0p-25f}}; - if (sign) { - return -1.0f + opt_barrier(FPBits(FPBits::MIN_NORMAL).get_val()); - } else - return 1.0f - opt_barrier(FPBits(FPBits::MIN_NORMAL).get_val()); - } + bool sign = xbits.get_sign(); + int idx = static_cast(sign); - // |x| <= 0.078125 - if (LIBC_UNLIKELY(x_abs <= 0x3da0'0000U)) { - double xdbl = x; - double x2 = xdbl * xdbl; - // Pure Taylor series. - double pe = fputil::polyeval(x2, 0.0, -0x1.5555555555555p-2, - 0x1.1111111111111p-3, -0x1.ba1ba1ba1ba1cp-5, - 0x1.664f4882c10fap-6, -0x1.226e355e6c23dp-7); - return static_cast(fputil::multiply_add(xdbl, pe, xdbl)); - } + if (LIBC_UNLIKELY(xbits.is_inf())) + return SIGNS[idx][0]; - if (LIBC_UNLIKELY(xbits.bits == 0x4058'e0a3U)) { - if (fputil::fenv_is_round_down()) - return FPBits(0x3f7f'6ad9U).get_val(); + return SIGNS[idx][0] + SIGNS[idx][1]; } - // Range reduction: e^(2x) = 2^(mid + hi) * e^lo - auto ep = exp_b_range_reduc(2.0f * x); // exp(2 * x) - double r = ExpBase::powb_lo(ep.lo); - // tanh(x) = (exp(2x) - 1) / (exp(2x) + 1) -#if defined(LIBC_TARGET_CPU_HAS_FMA) - return static_cast(fputil::multiply_add(ep.mh, r, -1.0) / - fputil::multiply_add(ep.mh, r, 1.0)); + // Range reduction: e^(2x) = 2^(hi + mid) * e^lo + // Let k = round( x * 2^6 * log2(e)), + // So k = (hi + mid) * 2^5 + // Then lo = 2x - (hi + mid) * log(2) = 2x - k * 2^-5 * log(2). + + double xd = static_cast(x); + // k = round( x* 2^6 * log2(e) ) + double k; + // mk = -k + int mk; +#ifdef LIBC_TARGET_CPU_HAS_NEAREST_INT + k = fputil::nearest_integer(xd * LOG2_E_EXP2_6); + mk = -static_cast(k); #else - double exp_x = ep.mh * r; - return static_cast((exp_x - 1.0) / (exp_x + 1.0)); -#endif // LIBC_TARGET_CPU_HAS_FMA + constexpr double HALF_WAY[2] = {-0.5, 0.5}; + + mk = static_cast( + fputil::multiply_add(xd, -LOG2_E_EXP2_6, HALF_WAY[xbits.get_sign()])); + k = static_cast(-mk); +#endif // LIBC_TARGET_CPU_HAS_NEAREST_INT + // -hi = floor(-k * 2^(-MID_BITS)) + // exp_mhi = shift -hi to the exponent field of double precision. + int64_t exp_mhi = static_cast(mk >> ExpBase::MID_BITS) + << fputil::FloatProperties::MANTISSA_WIDTH; + // mh = 2^(-hi - mid) + int64_t mh_bits = ExpBase::EXP_2_MID[mk & ExpBase::MID_MASK] + exp_mhi; + double mh = fputil::FPBits(uint64_t(mh_bits)).get_val(); + // dx = lo/2 = x - (hi + mid) * log(2)/2 = x - k * 2^-6 * log(2) + double dx = fputil::multiply_add( + k, ExpBase::M_LOGB_2_LO * 0.5, + fputil::multiply_add(k, ExpBase::M_LOGB_2_HI * 0.5, xd)); + + // > P = fpminimax(expm1(2*x)/x, 4, [|D...|], [-log(2)/128, log(2)/128]); + constexpr double COEFFS[] = {0x1.ffffffffe5bc8p0, 0x1.555555555cd67p0, + 0x1.5555c2a9b48b4p-1, 0x1.11112a0e34bdbp-2}; + + double dx2 = dx * dx; + double c0 = fputil::multiply_add(dx, 2.0, 1.0); + double c1 = fputil::multiply_add(dx, COEFFS[1], COEFFS[0]); + double c2 = fputil::multiply_add(dx, COEFFS[3], COEFFS[2]); + double r = fputil::polyeval(dx2, c0, c1, c2); + + // tanh(x) = sinh(x) / cosh(x) + // = (e^x - e^(-x)) / (e^x + e^(-x)) + // = (e^(2x) - 1) / (e^(2x) + 1) + // = (2^(hi + mid) * e^lo - 1) / (2^(hi + mid) * e^lo + 1) + // = (e^lo - 2^(-hi - mid)) / (e^lo + 2^(-hi - mid)) + // = (r - mh) / (r + mh) + return static_cast((r - mh) / (r + mh)); } } // namespace __llvm_libc diff --git a/libc/test/src/math/tanhf_test.cpp b/libc/test/src/math/tanhf_test.cpp --- a/libc/test/src/math/tanhf_test.cpp +++ b/libc/test/src/math/tanhf_test.cpp @@ -43,35 +43,31 @@ } TEST(LlvmLibcTanhfTest, InFloatRange) { - constexpr uint32_t COUNT = 100'000; + constexpr uint32_t COUNT = 100'001; constexpr uint32_t STEP = UINT32_MAX / COUNT; for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) { float x = float(FPBits(v)); if (isnan(x) || isinf(x)) continue; - ASSERT_MPFR_MATCH(mpfr::Operation::Tanh, x, __llvm_libc::tanhf(x), 0.5); + ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Tanh, x, + __llvm_libc::tanhf(x), 0.5); } } -// For small values, tanh(x) is x. -TEST(LlvmLibcTanhfTest, SmallValues) { - float x = float(FPBits(uint32_t(0x17800000))); - float result = __llvm_libc::tanhf(x); - EXPECT_MPFR_MATCH(mpfr::Operation::Tanh, x, result, 0.5); - EXPECT_FP_EQ(x, result); - - x = float(FPBits(uint32_t(0x00400000))); - result = __llvm_libc::tanhf(x); - EXPECT_MPFR_MATCH(mpfr::Operation::Tanh, x, result, 0.5); - EXPECT_FP_EQ(x, result); -} - TEST(LlvmLibcTanhfTest, ExceptionalValues) { - float x = float(FPBits(uint32_t(0x3a12'85ffU))); - EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Tanh, x, - __llvm_libc::tanhf(x), 0.5); - - x = -float(FPBits(uint32_t(0x3a12'85ffU))); - EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Tanh, x, - __llvm_libc::tanhf(x), 0.5); + constexpr int N = 4; + constexpr uint32_t INPUTS[N] = { + 0x0040'0000, + 0x1780'0000, + 0x3a12'85ff, + 0x4058'e0a3, + }; + + for (int i = 0; i < N; ++i) { + float x = float(FPBits(INPUTS[i])); + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Tanh, x, + __llvm_libc::tanhf(x), 0.5); + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Tanh, -x, + __llvm_libc::tanhf(-x), 0.5); + } } diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -81,7 +81,10 @@ libc_support_library( name = "__support_macros_properties_cpu_features", hdrs = ["src/__support/macros/properties/cpu_features.h"], - deps = [":libc_root"], + deps = [ + "__support_macros_properties_architectures", + ":libc_root", + ], ) libc_support_library(