diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -973,8 +973,10 @@ ../hypotf.h DEPENDS libc.src.__support.FPUtil.fputil + libc.src.__support.FPUtil.sqrt COMPILE_OPTIONS -O3 + -Wno-c++17-extensions ) add_entrypoint_object( diff --git a/libc/src/math/generic/hypotf.cpp b/libc/src/math/generic/hypotf.cpp --- a/libc/src/math/generic/hypotf.cpp +++ b/libc/src/math/generic/hypotf.cpp @@ -6,13 +6,68 @@ // //===----------------------------------------------------------------------===// #include "src/math/hypotf.h" -#include "src/__support/FPUtil/Hypot.h" +#include "src/__support/FPUtil/BasicOperations.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/sqrt.h" #include "src/__support/common.h" namespace __llvm_libc { LLVM_LIBC_FUNCTION(float, hypotf, (float x, float y)) { - return __llvm_libc::fputil::hypot(x, y); + using DoubleBits = fputil::FPBits; + using FPBits = fputil::FPBits; + + FPBits x_bits(x), y_bits(y); + + uint16_t x_exp = x_bits.get_unbiased_exponent(); + uint16_t y_exp = y_bits.get_unbiased_exponent(); + uint16_t exp_diff = (x_exp > y_exp) ? (x_exp - y_exp) : (y_exp - x_exp); + + if (exp_diff >= fputil::MantissaWidth::VALUE + 2) { + return fputil::abs(x) + fputil::abs(y); + } + + double xd = static_cast(x); + double yd = static_cast(y); + + // These squares are exact. + double x_sq = xd * xd; + double y_sq = yd * yd; + + // Compute the sum of squares. + double sum_sq = x_sq + y_sq; + + // Compute the rounding error with Fast2Sum algorithm: + // x_sq + y_sq = sum_sq - err + double err = (x_sq >= y_sq) ? (sum_sq - x_sq) - y_sq : (sum_sq - y_sq) - x_sq; + + // Take sqrt in double precision. + DoubleBits result(fputil::sqrt(sum_sq)); + + if (!DoubleBits(sum_sq).is_inf_or_nan()) { + // Correct rounding. + double r_sq = static_cast(result) * static_cast(result); + double diff = sum_sq - r_sq; + constexpr uint64_t mask = 0x0000'0000'3FFF'FFFFULL; + uint64_t lrs = result.uintval() & mask; + + if (lrs == 0x0000'0000'1000'0000ULL && err < diff) { + result.bits |= 1ULL; + } else if (lrs == 0x0000'0000'3000'0000ULL && err > diff) { + result.bits -= 1ULL; + } + } else { + FPBits bits_x(x), bits_y(y); + if (bits_x.is_inf_or_nan() || bits_y.is_inf_or_nan()) { + if (bits_x.is_inf() || bits_y.is_inf()) + return static_cast(FPBits::inf()); + if (bits_x.is_nan()) + return x; + return y; + } + } + + return static_cast(static_cast(result)); } } // namespace __llvm_libc diff --git a/libc/test/src/math/differential_testing/BinaryOpSingleOutputDiff.h b/libc/test/src/math/differential_testing/BinaryOpSingleOutputDiff.h --- a/libc/test/src/math/differential_testing/BinaryOpSingleOutputDiff.h +++ b/libc/test/src/math/differential_testing/BinaryOpSingleOutputDiff.h @@ -78,6 +78,11 @@ log << "\n Performance tests with inputs in normal range:\n"; run_perf_in_range(myFunc, otherFunc, /* startingBit= */ FPBits::MIN_NORMAL, /* endingBit= */ FPBits::MAX_NORMAL, 100'000'001, log); + log << "\n Performance tests with inputs in normal range with exponents " + "close to each other:\n"; + run_perf_in_range( + myFunc, otherFunc, /* startingBit= */ FPBits(T(0x1.0p-10)).uintval(), + /* endingBit= */ FPBits(T(0x1.0p+10)).uintval(), 10'000'001, log); } }; diff --git a/libc/test/src/math/exhaustive/CMakeLists.txt b/libc/test/src/math/exhaustive/CMakeLists.txt --- a/libc/test/src/math/exhaustive/CMakeLists.txt +++ b/libc/test/src/math/exhaustive/CMakeLists.txt @@ -112,3 +112,22 @@ LINK_OPTIONS -lpthread ) + +add_fp_unittest( + hypotf_test + NO_RUN_POSTBUILD + NEED_MPFR + SUITE + libc_math_exhaustive_tests + SRCS + hypotf_test.cpp + DEPENDS + .exhaustive_test + libc.include.math + libc.src.math.hypotf + libc.src.__support.FPUtil.fputil + COMPILE_OPTIONS + -O3 + LINK_OPTIONS + -lpthread +) diff --git a/libc/test/src/math/exhaustive/hypotf_test.cpp b/libc/test/src/math/exhaustive/hypotf_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/src/math/exhaustive/hypotf_test.cpp @@ -0,0 +1,65 @@ +//===-- Exhaustive test for hypotf ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "exhaustive_test.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/Hypot.h" +#include "src/math/hypotf.h" +#include "utils/MPFRWrapper/MPFRUtils.h" +#include "utils/UnitTest/FPMatcher.h" + +using FPBits = __llvm_libc::fputil::FPBits; + +namespace mpfr = __llvm_libc::testing::mpfr; + +struct LlvmLibcHypotfExhaustiveTest : public LlvmLibcExhaustiveTest { + void check(uint32_t start, uint32_t stop, + mpfr::RoundingMode rounding) override { + // Range of the second input: [2^37, 2^48). + constexpr uint32_t Y_START = (37U + 127U) << 23; + constexpr uint32_t Y_STOP = (48U + 127U) << 23; + + mpfr::ForceRoundingMode r(rounding); + uint32_t xbits = start; + do { + float x = float(FPBits(xbits)); + uint32_t ybits = Y_START; + do { + float y = float(FPBits(ybits)); + EXPECT_FP_EQ(__llvm_libc::fputil::hypot(x, y), + __llvm_libc::hypotf(x, y)); + // Using MPFR will be much slower. + // mpfr::BinaryInput input{x, y}; + // EXPECT_MPFR_MATCH(mpfr::Operation::Hypot, input, + // __llvm_libc::hypotf(x, y), 0.5, + // rounding); + } while (ybits++ < Y_STOP); + } while (xbits++ < stop); + } +}; + +// Range of the first input: [2^23, 2^24); +static constexpr uint32_t START = (23U + 127U) << 23; +static constexpr uint32_t STOP = ((23U + 127U) << 23) + 1; +static constexpr int NUM_THREADS = 1; + +TEST_F(LlvmLibcHypotfExhaustiveTest, RoundNearestTieToEven) { + test_full_range(START, STOP, NUM_THREADS, mpfr::RoundingMode::Nearest); +} + +TEST_F(LlvmLibcHypotfExhaustiveTest, RoundUp) { + test_full_range(START, STOP, NUM_THREADS, mpfr::RoundingMode::Upward); +} + +TEST_F(LlvmLibcHypotfExhaustiveTest, RoundDown) { + test_full_range(START, STOP, NUM_THREADS, mpfr::RoundingMode::Downward); +} + +TEST_F(LlvmLibcHypotfExhaustiveTest, RoundTowardZero) { + test_full_range(START, STOP, NUM_THREADS, mpfr::RoundingMode::TowardZero); +} diff --git a/libc/test/src/math/hypotf_hard_to_round.h b/libc/test/src/math/hypotf_hard_to_round.h --- a/libc/test/src/math/hypotf_hard_to_round.h +++ b/libc/test/src/math/hypotf_hard_to_round.h @@ -13,8 +13,9 @@ namespace mpfr = __llvm_libc::testing::mpfr; -constexpr int N_HARD_TO_ROUND = 1216; +constexpr int N_HARD_TO_ROUND = 1217; constexpr mpfr::BinaryInput HYPOTF_HARD_TO_ROUND[N_HARD_TO_ROUND] = { + {0x1.faf49ep+25f, 0x1.480002p+23f}, {0x1.ffffecp-1f, 0x1.000002p+27}, {0x1.900004p+34, 0x1.400002p+23}, /* 45 identical bits */ {0x1.05555p+34, 0x1.bffffep+23}, /* 44 identical bits */