diff --git a/libc/src/__support/FPUtil/nearest_integer.h b/libc/src/__support/FPUtil/nearest_integer.h --- a/libc/src/__support/FPUtil/nearest_integer.h +++ b/libc/src/__support/FPUtil/nearest_integer.h @@ -44,6 +44,22 @@ return x; } +static inline float nearest_integer(float x) { + if (x < 0x1p24f && x > -0x1p24f) { + float r = x < 0 ? (x - 0x1.0p23f) + 0x1.0p23f : (x + 0x1.0p23f) - 0x1.0p23f; + float diff = x - r; + // The expression above is correct for the default rounding mode, round-to- + // nearest, tie-to-even. For other rounding modes, it might be off by 1, + // which is corrected below. + if (unlikely(diff > 0.5f)) + return r + 1.0f; + if (unlikely(diff < -0.5)) + return r - 1.0f; + return r; + } + return x; +} + } // namespace fputil } // namespace __llvm_libc diff --git a/libc/src/__support/FPUtil/x86_64/nearest_integer.h b/libc/src/__support/FPUtil/x86_64/nearest_integer.h --- a/libc/src/__support/FPUtil/x86_64/nearest_integer.h +++ b/libc/src/__support/FPUtil/x86_64/nearest_integer.h @@ -31,6 +31,13 @@ return ymm[0]; } +static inline float nearest_integer(float x) { + __m128 xmm = _mm_set_ss(x); // NOLINT + __m128 ymm = + _mm_round_ss(xmm, xmm, _MM_ROUND_NEAREST | _MM_FROUND_NO_EXC); // NOLINT + return ymm[0]; +} + } // namespace fputil } // namespace __llvm_libc diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -478,6 +478,8 @@ DEPENDS .common_constants libc.src.__support.FPUtil.fputil + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.nearest_integer libc.src.__support.FPUtil.polyeval libc.include.math COMPILE_OPTIONS @@ -492,6 +494,8 @@ ../exp2f.h DEPENDS libc.src.__support.FPUtil.fputil + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.nearest_integer libc.src.__support.FPUtil.polyeval libc.include.math COMPILE_OPTIONS @@ -508,6 +512,7 @@ .common_constants libc.src.__support.FPUtil.fputil libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.nearest_integer libc.src.__support.FPUtil.polyeval libc.include.math COMPILE_OPTIONS diff --git a/libc/src/math/generic/exp2f.cpp b/libc/src/math/generic/exp2f.cpp --- a/libc/src/math/generic/exp2f.cpp +++ b/libc/src/math/generic/exp2f.cpp @@ -9,9 +9,10 @@ #include "src/math/exp2f.h" #include "src/__support/FPUtil/BasicOperations.h" #include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FMA.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/nearest_integer.h" #include "src/__support/common.h" #include @@ -55,34 +56,35 @@ uint32_t x_abs = x_u & 0x7fff'ffffU; // Exceptional values. - switch (x_u) { - case 0x3b42'9d37U: // x = 0x1.853a6ep-9f - if (fputil::get_round() == FE_TONEAREST) - return 0x1.00870ap+0f; - break; - case 0x3c02'a9adU: // x = 0x1.05535ap-7f - if (fputil::get_round() == FE_TONEAREST) - return 0x1.016b46p+0f; - break; - case 0x3ca6'6e26U: { // x = 0x1.4cdc4cp-6f - int round_mode = fputil::get_round(); - if (round_mode == FE_TONEAREST || round_mode == FE_UPWARD) - return 0x1.03a16ap+0f; - return 0x1.03a168p+0f; - } - case 0x3d92'a282U: // x = 0x1.254504p-4f - if (fputil::get_round() == FE_UPWARD) - return 0x1.0d0688p+0f; - return 0x1.0d0686p+0f; - case 0xbcf3'a937U: // x = -0x1.e7526ep-6f - if (fputil::get_round() == FE_TONEAREST) - return 0x1.f58d62p-1f; - break; - case 0xb8d3'd026U: // x = -0x1.a7a04cp-14f - if (fputil::get_round() == FE_TONEAREST) - return 0x1.fff6d2p-1f; - break; - } + // Wait for https://reviews.llvm.org/D129918 + // switch (x_u) { + // case 0x3b42'9d37U: // x = 0x1.853a6ep-9f + // if (fputil::get_round() == FE_TONEAREST) + // return 0x1.00870ap+0f; + // break; + // case 0x3c02'a9adU: // x = 0x1.05535ap-7f + // if (fputil::get_round() == FE_TONEAREST) + // return 0x1.016b46p+0f; + // break; + // case 0x3ca6'6e26U: { // x = 0x1.4cdc4cp-6f + // int round_mode = fputil::get_round(); + // if (round_mode == FE_TONEAREST || round_mode == FE_UPWARD) + // return 0x1.03a16ap+0f; + // return 0x1.03a168p+0f; + // } + // case 0x3d92'a282U: // x = 0x1.254504p-4f + // if (fputil::get_round() == FE_UPWARD) + // return 0x1.0d0688p+0f; + // return 0x1.0d0686p+0f; + // case 0xbcf3'a937U: // x = -0x1.e7526ep-6f + // if (fputil::get_round() == FE_TONEAREST) + // return 0x1.f58d62p-1f; + // break; + // case 0xb8d3'd026U: // x = -0x1.a7a04cp-14f + // if (fputil::get_round() == FE_TONEAREST) + // return 0x1.fff6d2p-1f; + // break; + // } // // When |x| >= 128, |x| < 2^-25, or x is nan if (unlikely(x_abs >= 0x4300'0000U || x_abs <= 0x3280'0000U)) { @@ -136,8 +138,8 @@ // The default rounding mode for float-to-int conversion in C++ is // round-toward-zero. To make it round-to-nearest, we add (-1)^sign(x) * 0.5 // before conversion. - int x_hi = - static_cast(x * 0x1.0p+6f + (xbits.get_sign() ? -0.5f : 0.5f)); + float kf = fputil::nearest_integer(x * 0x1.0p+6f); + int x_hi = static_cast(kf); // For 2-complement integers, arithmetic right shift is the same as dividing // by a power of 2 and then round down (toward negative infinity). int e_hi = (x_hi >> 6) + @@ -148,8 +150,7 @@ // mid = x_hi & 0x0000'003fU; double exp_hi_mid = static_cast(exp_hi) * EXP_M[x_hi & 0x3f]; // Subtract (hi + mid) from x to get lo. - x -= static_cast(x_hi) * 0x1.0p-6f; - double xd = static_cast(x); + double xd = fputil::multiply_add(kf, -0x1.0p-6f, x); // Degree-4 minimax polynomial generated by Sollya with the following // commands: // > display = hexadecimal; diff --git a/libc/src/math/generic/expf.cpp b/libc/src/math/generic/expf.cpp --- a/libc/src/math/generic/expf.cpp +++ b/libc/src/math/generic/expf.cpp @@ -10,9 +10,10 @@ #include "common_constants.h" // Lookup tables EXP_M1 and EXP_M2. #include "src/__support/FPUtil/BasicOperations.h" #include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FMA.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/nearest_integer.h" #include "src/__support/common.h" #include @@ -83,10 +84,10 @@ // The default rounding mode for float-to-int conversion in C++ is // round-toward-zero. To make it round-to-nearest, we add (-1)^sign(x) * 0.5 // before conversion. - int x_hi = static_cast(x * 0x1.0p7f + (xbits.get_sign() ? -0.5f : 0.5f)); + float kf = fputil::nearest_integer(x * 0x1.0p7f); + double xd = static_cast(fputil::multiply_add(kf, -0x1.0p-7f, x)); + int x_hi = static_cast(kf); // Subtract (hi + mid) from x to get lo. - x -= static_cast(x_hi) * 0x1.0p-7f; - double xd = static_cast(x); x_hi += 104 << 7; // hi = x_hi >> 7 double exp_hi = EXP_M1[x_hi >> 7]; diff --git a/libc/src/math/generic/expm1f.cpp b/libc/src/math/generic/expm1f.cpp --- a/libc/src/math/generic/expm1f.cpp +++ b/libc/src/math/generic/expm1f.cpp @@ -10,9 +10,10 @@ #include "common_constants.h" // Lookup tables EXP_M1 and EXP_M2. #include "src/__support/FPUtil/BasicOperations.h" #include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FMA.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/nearest_integer.h" #include "src/__support/common.h" #include @@ -133,10 +134,10 @@ // generated by Sollya. // x_hi = hi + mid. - int x_hi = static_cast(x * 0x1.0p7f + (xbits.get_sign() ? -0.5f : 0.5f)); + float kf = fputil::nearest_integer(x * 0x1.0p7f); + int x_hi = static_cast(kf); // Subtract (hi + mid) from x to get lo. - x -= static_cast(x_hi) * 0x1.0p-7f; - double xd = static_cast(x); + double xd = static_cast(fputil::multiply_add(kf, -0x1.0p-7f, x)); x_hi += 104 << 7; // hi = x_hi >> 7 double exp_hi = EXP_M1[x_hi >> 7];