diff --git a/libc/src/math/generic/expm1f.cpp b/libc/src/math/generic/expm1f.cpp --- a/libc/src/math/generic/expm1f.cpp +++ b/libc/src/math/generic/expm1f.cpp @@ -34,6 +34,15 @@ return 0x1.8dbe62p-3f; } +#if !defined(LIBC_TARGET_HAS_FMA) + if (unlikely(x_u == 0xbdc1'c6cbU)) { // x = -0x1.838d96p-4f + int round_mode = fputil::get_round(); + if (round_mode == FE_TONEAREST || round_mode == FE_DOWNWARD) + return -0x1.71c884p-4f; + return -0x1.71c882p-4f; + } +#endif // LIBC_TARGET_HAS_FMA + // When |x| > 25*log(2), or nan if (unlikely(x_abs >= 0x418a'a123U)) { // x < log(2^-25) @@ -70,19 +79,30 @@ // x = -0.0f if (unlikely(xbits.uintval() == 0x8000'0000U)) return x; - // When |x| < 2^-25, the relative error of the approximation e^x - 1 ~ x - // is: - // |(e^x - 1) - x| / |e^x - 1| < |x^2| / |x| - // = |x| - // < 2^-25 - // < epsilon(1)/2. - // So the correctly rounded values of expm1(x) are: - // = x + eps(x) if rounding mode = FE_UPWARD, - // or (rounding mode = FE_TOWARDZERO and x is negative), - // = x otherwise. - // To simplify the rounding decision and make it more efficient, we use - // fma(x, x, x) ~ x + x^2 instead. - return fputil::multiply_add(x, x, x); + // When |x| < 2^-25, the relative error of the approximation e^x - 1 ~ x + // is: + // |(e^x - 1) - x| / |e^x - 1| < |x^2| / |x| + // = |x| + // < 2^-25 + // < epsilon(1)/2. + // So the correctly rounded values of expm1(x) are: + // = x + eps(x) if rounding mode = FE_UPWARD, + // or (rounding mode = FE_TOWARDZERO and x is + // negative), + // = x otherwise. + // To simplify the rounding decision and make it more efficient, we use + // fma(x, x, x) ~ x + x^2 instead. + // Note: to use the formula x + x^2 to decide the correct rounding, we + // do need fma(x, x, x) to prevent underflow caused by x*x when |x| < + // 2^-76. For targets without FMA instructions, we simply use double for + // intermediate results as it is more efficient than using an emulated + // version of FMA. +#if defined(LIBC_TARGET_HAS_FMA) + return fputil::fma(x, x, x); +#else + double xd = x; + return static_cast(fputil::multiply_add(xd, xd, xd)); +#endif // LIBC_TARGET_HAS_FMA } // 2^-25 <= |x| < 2^-4 diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt --- a/libc/test/src/math/CMakeLists.txt +++ b/libc/test/src/math/CMakeLists.txt @@ -1201,8 +1201,6 @@ libc.include.math libc.src.math.expm1f libc.src.__support.FPUtil.fputil - FLAGS - FMA_OPT__ONLY ) add_fp_unittest( diff --git a/libc/test/src/math/exhaustive/CMakeLists.txt b/libc/test/src/math/exhaustive/CMakeLists.txt --- a/libc/test/src/math/exhaustive/CMakeLists.txt +++ b/libc/test/src/math/exhaustive/CMakeLists.txt @@ -92,7 +92,6 @@ DEPENDS .exhaustive_test libc.include.math - libc.src.math.expf libc.src.math.expm1f libc.src.__support.FPUtil.fputil LINK_LIBRARIES diff --git a/libc/test/src/math/exhaustive/expm1f_test.cpp b/libc/test/src/math/exhaustive/expm1f_test.cpp --- a/libc/test/src/math/exhaustive/expm1f_test.cpp +++ b/libc/test/src/math/exhaustive/expm1f_test.cpp @@ -18,7 +18,7 @@ namespace mpfr = __llvm_libc::testing::mpfr; -struct LlvmLibcExpfExhaustiveTest : public LlvmLibcExhaustiveTest { +struct LlvmLibcExpm1fExhaustiveTest : public LlvmLibcExhaustiveTest { bool check(uint32_t start, uint32_t stop, mpfr::RoundingMode rounding) override { mpfr::ForceRoundingMode r(rounding); @@ -40,21 +40,21 @@ static constexpr uint32_t POS_START = 0x0000'0000U; static constexpr uint32_t POS_STOP = 0x42b2'0000U; -TEST_F(LlvmLibcExpfExhaustiveTest, PostiveRangeRoundNearestTieToEven) { +TEST_F(LlvmLibcExpm1fExhaustiveTest, PostiveRangeRoundNearestTieToEven) { test_full_range(POS_START, POS_STOP, NUM_THREADS, mpfr::RoundingMode::Nearest); } -TEST_F(LlvmLibcExpfExhaustiveTest, PostiveRangeRoundUp) { +TEST_F(LlvmLibcExpm1fExhaustiveTest, PostiveRangeRoundUp) { test_full_range(POS_START, POS_STOP, NUM_THREADS, mpfr::RoundingMode::Upward); } -TEST_F(LlvmLibcExpfExhaustiveTest, PostiveRangeRoundDown) { +TEST_F(LlvmLibcExpm1fExhaustiveTest, PostiveRangeRoundDown) { test_full_range(POS_START, POS_STOP, NUM_THREADS, mpfr::RoundingMode::Downward); } -TEST_F(LlvmLibcExpfExhaustiveTest, PostiveRangeRoundTowardZero) { +TEST_F(LlvmLibcExpm1fExhaustiveTest, PostiveRangeRoundTowardZero) { test_full_range(POS_START, POS_STOP, NUM_THREADS, mpfr::RoundingMode::TowardZero); } @@ -63,21 +63,21 @@ static constexpr uint32_t NEG_START = 0x8000'0000U; static constexpr uint32_t NEG_STOP = 0xc2d0'0000U; -TEST_F(LlvmLibcExpfExhaustiveTest, NegativeRangeRoundNearestTieToEven) { +TEST_F(LlvmLibcExpm1fExhaustiveTest, NegativeRangeRoundNearestTieToEven) { test_full_range(NEG_START, NEG_STOP, NUM_THREADS, mpfr::RoundingMode::Nearest); } -TEST_F(LlvmLibcExpfExhaustiveTest, NegativeRangeRoundUp) { +TEST_F(LlvmLibcExpm1fExhaustiveTest, NegativeRangeRoundUp) { test_full_range(NEG_START, NEG_STOP, NUM_THREADS, mpfr::RoundingMode::Upward); } -TEST_F(LlvmLibcExpfExhaustiveTest, NegativeRangeRoundDown) { +TEST_F(LlvmLibcExpm1fExhaustiveTest, NegativeRangeRoundDown) { test_full_range(NEG_START, NEG_STOP, NUM_THREADS, mpfr::RoundingMode::Downward); } -TEST_F(LlvmLibcExpfExhaustiveTest, NegativeRangeRoundTowardZero) { +TEST_F(LlvmLibcExpm1fExhaustiveTest, NegativeRangeRoundTowardZero) { test_full_range(NEG_START, NEG_STOP, NUM_THREADS, mpfr::RoundingMode::TowardZero); } diff --git a/libc/test/src/math/expm1f_test.cpp b/libc/test/src/math/expm1f_test.cpp --- a/libc/test/src/math/expm1f_test.cpp +++ b/libc/test/src/math/expm1f_test.cpp @@ -97,6 +97,16 @@ ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, x, __llvm_libc::expm1f(x), 0.5); EXPECT_MATH_ERRNO(0); + + x = float(FPBits(0x942ed494U)); + ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, x, + __llvm_libc::expm1f(x), 0.5); + EXPECT_MATH_ERRNO(0); + + x = float(FPBits(0xbdc1c6cbU)); + ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Expm1, x, + __llvm_libc::expm1f(x), 0.5); + EXPECT_MATH_ERRNO(0); } TEST(LlvmLibcExpm1fTest, InFloatRange) {