diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -71,6 +71,7 @@ libc.src.stdlib.strtod libc.src.stdlib.strtof libc.src.stdlib.strtol + libc.src.stdlib.strtold libc.src.stdlib.strtoll libc.src.stdlib.strtoul libc.src.stdlib.strtoull diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -516,8 +516,9 @@ FunctionSpec<"qsort", RetValSpec, [ArgSpec, ArgSpec, ArgSpec, ArgSpec]>, - FunctionSpec<"strtod", RetValSpec, [ArgSpec, ArgSpec]>, FunctionSpec<"strtof", RetValSpec, [ArgSpec, ArgSpec]>, + FunctionSpec<"strtod", RetValSpec, [ArgSpec, ArgSpec]>, + FunctionSpec<"strtold", RetValSpec, [ArgSpec, ArgSpec]>, FunctionSpec<"strtol", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, FunctionSpec<"strtoll", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, FunctionSpec<"strtoul", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, diff --git a/libc/src/__support/str_to_float.h b/libc/src/__support/str_to_float.h --- a/libc/src/__support/str_to_float.h +++ b/libc/src/__support/str_to_float.h @@ -67,6 +67,17 @@ return static_cast(num >> 64); } +template inline void set_implicit_bit(fputil::FPBits &result) { + return; +} + +#if defined(SPECIAL_X86_LONG_DOUBLE) +template <> +inline void set_implicit_bit(fputil::FPBits &result) { + result.set_implicit_bit(result.get_unbiased_exponent() != 0); +} +#endif + // This Eisel-Lemire implementation is based on the algorithm described in the // paper Number Parsing at a Gigabyte per Second, Software: Practice and // Experience 51 (8), 2021 (https://arxiv.org/abs/2101.11408), as well as the @@ -114,9 +125,12 @@ // The halfway constant is used to check if the bits that will be shifted away // intially are all 1. For doubles this is 64 (bitstype size) - 52 (final // mantissa size) - 3 (we shift away the last two bits separately for - // accuracy, and the most significant bit is ignored.) = 9. Similarly, it's 6 - // for floats in this case. - const uint64_t halfway_constant = sizeof(T) == 8 ? 0x1FF : 0x3F; + // accuracy, and the most significant bit is ignored.) = 9 bits. Similarly, + // it's 6 bits for floats in this case. + const uint64_t halfway_constant = + (uint64_t(1) << (BITS_IN_MANTISSA - + fputil::FloatProperties::MANTISSA_WIDTH - 3)) - + 1; if ((high64(first_approx) & halfway_constant) == halfway_constant && low64(first_approx) + mantissa < mantissa) { __uint128_t low_bits = static_cast<__uint128_t>(mantissa) * @@ -289,6 +303,13 @@ << fputil::FloatProperties::MANTISSA_WIDTH) { final_mantissa >>= 1; ++exp2; + + // Check if this rounding causes exp2 to go out of range and make the result + // INF. If this is the case, then finalMantissa and exp2 are already the + // correct values for an INF result. + if (exp2 >= fputil::FPBits::MAX_EXPONENT) { + errno = ERANGE; // NOLINT + } } if (exp2 == 0) { @@ -328,6 +349,45 @@ static constexpr double MAX_EXACT_INT = 9007199254740991.0; }; +#if defined(LONG_DOUBLE_IS_DOUBLE) +template <> class ClingerConsts { +public: + static constexpr long double POWERS_OF_TEN_ARRAY[] = + ClingerConsts::POWERS_OF_TEN_ARRAY; + static constexpr int32_t EXACT_POWERS_OF_TEN = + ClingerConsts::EXACT_POWERS_OF_TEN; + static constexpr int32_t DIGITS_IN_MANTISSA = + ClingerConsts::DIGITS_IN_MANTISSA; + static constexpr long double MAX_EXACT_INT = + ClingerConsts::MAX_EXACT_INT; +}; +#elif defined(SPECIAL_X86_LONG_DOUBLE) +template <> class ClingerConsts { +public: + static constexpr long double POWERS_OF_TEN_ARRAY[] = { + 1e0L, 1e1L, 1e2L, 1e3L, 1e4L, 1e5L, 1e6L, 1e7L, 1e8L, 1e9L, + 1e10L, 1e11L, 1e12L, 1e13L, 1e14L, 1e15L, 1e16L, 1e17L, 1e18L, 1e19L, + 1e20L, 1e21L, 1e22L, 1e23L, 1e24L, 1e25L, 1e26L, 1e27L}; + static constexpr int32_t EXACT_POWERS_OF_TEN = 27; + static constexpr int32_t DIGITS_IN_MANTISSA = 21; + static constexpr long double MAX_EXACT_INT = 18446744073709551615.0L; +}; +#else +template <> class ClingerConsts { +public: + static constexpr long double POWERS_OF_TEN_ARRAY[] = { + 1e0L, 1e1L, 1e2L, 1e3L, 1e4L, 1e5L, 1e6L, 1e7L, 1e8L, 1e9L, + 1e10L, 1e11L, 1e12L, 1e13L, 1e14L, 1e15L, 1e16L, 1e17L, 1e18L, 1e19L, + 1e20L, 1e21L, 1e22L, 1e23L, 1e24L, 1e25L, 1e26L, 1e27L, 1e28L, 1e29L, + 1e30L, 1e31L, 1e32L, 1e33L, 1e34L, 1e35L, 1e36L, 1e37L, 1e38L, 1e39L, + 1e40L, 1e41L, 1e42L, 1e43L, 1e44L, 1e45L, 1e46L, 1e47L, 1e48L}; + static constexpr int32_t EXACT_POWERS_OF_TEN = 48; + static constexpr int32_t DIGITS_IN_MANTISSA = 33; + static constexpr long double MAX_EXACT_INT = + 10384593717069655257060992658440191.0L; +}; +#endif + // Take an exact mantissa and exponent and attempt to convert it using only // exact floating point arithmetic. This only handles numbers with low // exponents, but handles them quickly. This is an implementation of Clinger's @@ -837,6 +897,10 @@ if (strEnd != nullptr) *strEnd = const_cast(src); + // This function only does something if T is long double and the platform uses + // special 80 bit long doubles. Otherwise it should be inlined out. + set_implicit_bit(result); + return T(result); } diff --git a/libc/src/stdlib/CMakeLists.txt b/libc/src/stdlib/CMakeLists.txt --- a/libc/src/stdlib/CMakeLists.txt +++ b/libc/src/stdlib/CMakeLists.txt @@ -58,6 +58,16 @@ libc.src.__support.str_to_float ) +add_entrypoint_object( + strtold + SRCS + strtold.cpp + HDRS + strtold.h + DEPENDS + libc.src.__support.str_to_float +) + add_entrypoint_object( strtol SRCS diff --git a/libc/src/stdlib/strtold.h b/libc/src/stdlib/strtold.h new file mode 100644 --- /dev/null +++ b/libc/src/stdlib/strtold.h @@ -0,0 +1,18 @@ +//===-- Implementation header for strtold -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDLIB_STRTOLD_H +#define LLVM_LIBC_SRC_STDLIB_STRTOLD_H + +namespace __llvm_libc { + +long double strtold(const char *__restrict str, char **__restrict str_end); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDLIB_STRTOLD_H diff --git a/libc/src/stdlib/strtold.cpp b/libc/src/stdlib/strtold.cpp new file mode 100644 --- /dev/null +++ b/libc/src/stdlib/strtold.cpp @@ -0,0 +1,20 @@ +//===-- Implementation of strtold -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdlib/strtold.h" +#include "src/__support/common.h" +#include "src/__support/str_to_float.h" + +namespace __llvm_libc { + +LLVM_LIBC_FUNCTION(long double, strtold, + (const char *__restrict str, char **__restrict str_end)) { + return internal::strtofloatingpoint(str, str_end); +} + +} // namespace __llvm_libc diff --git a/libc/test/src/stdlib/CMakeLists.txt b/libc/test/src/stdlib/CMakeLists.txt --- a/libc/test/src/stdlib/CMakeLists.txt +++ b/libc/test/src/stdlib/CMakeLists.txt @@ -70,6 +70,16 @@ libc.src.stdlib.strtol ) +add_libc_unittest( + strtold_test + SUITE + libc_stdlib_unittests + SRCS + strtold_test.cpp + DEPENDS + libc.src.stdlib.strtold +) + add_libc_unittest( strtoll_test SUITE diff --git a/libc/test/src/stdlib/strtold_test.cpp b/libc/test/src/stdlib/strtold_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/src/stdlib/strtold_test.cpp @@ -0,0 +1,221 @@ +//===-- Unittests for strtold ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/FPUtil/FPBits.h" +#include "src/stdlib/strtold.h" + +#include "utils/UnitTest/Test.h" + +#include +#include +#include + +class LlvmLibcStrToLDTest : public __llvm_libc::testing::Test { +public: + void runTest(const char *inputString, const ptrdiff_t expectedStrLen, + const uint64_t expectedRawData64, + const __uint128_t expectedRawData80, + const __uint128_t expectedRawData128, + const int expectedErrno64 = 0, const int expectedErrno80 = 0, + const int expectedErrno128 = 0) { + // expectedRawData64 is the expected long double result as a uint64_t, + // organized according to the IEEE754 double precision format: + // + // +-- 1 Sign Bit +-- 52 Mantissa bits + // | | + // | +-------------------------+------------------------+ + // | | | + // SEEEEEEEEEEEMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM + // | | + // +----+----+ + // | + // +-- 11 Exponent Bits + + // expectedRawData80 is the expected long double result as a __uint128_t, + // organized according to the x86 extended precision format: + // + // +-- 1 Sign Bit + // | + // | +-- 1 Integer part bit (1 unless this is a subnormal) + // | | + // SEEEEEEEEEEEEEEEIMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM...M + // | | | | + // +------+------+ +---------------------------+--------------------------+ + // | | + // +-- 15 Exponent Bits +-- 63 Mantissa bits + + // expectedRawData64 is the expected long double result as a __uint128_t, + // organized according to IEEE754 quadruple precision format: + // + // +-- 1 Sign Bit +-- 112 Mantissa bits + // | | + // | +----------------------------+--------------------------+ + // | | | + // SEEEEEEEEEEEEEEEMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM...M + // | | + // +------+------+ + // | + // +-- 15 Exponent Bits + char *strEnd = nullptr; + +#if defined(LONG_DOUBLE_IS_DOUBLE) + __llvm_libc::fputil::FPBits expectedFP = + __llvm_libc::fputil::FPBits(expectedRawData64); + const int expectedErrno = expectedErrno64; +#elif defined(SPECIAL_X86_LONG_DOUBLE) + __llvm_libc::fputil::FPBits expectedFP = + __llvm_libc::fputil::FPBits(expectedRawData80); + const int expectedErrno = expectedErrno80; +#else + __llvm_libc::fputil::FPBits expectedFP = + __llvm_libc::fputil::FPBits(expectedRawData128); + const int expectedErrno = expectedErrno128; +#endif + + errno = 0; + long double result = __llvm_libc::strtold(inputString, &strEnd); + + __llvm_libc::fputil::FPBits actualFP = + __llvm_libc::fputil::FPBits(); + actualFP = __llvm_libc::fputil::FPBits(result); + + EXPECT_EQ(strEnd - inputString, expectedStrLen); + + EXPECT_EQ(actualFP.bits, expectedFP.bits); + EXPECT_EQ(actualFP.get_sign(), expectedFP.get_sign()); + EXPECT_EQ(actualFP.get_exponent(), expectedFP.get_exponent()); + EXPECT_EQ(actualFP.get_mantissa(), expectedFP.get_mantissa()); + EXPECT_EQ(errno, expectedErrno); + } +}; + +TEST_F(LlvmLibcStrToLDTest, SimpleTest) { + runTest("123", 3, uint64_t(0x405ec00000000000), + __uint128_t(0x4005f60000) << 40, + __uint128_t(0x4005ec0000000000) << 64); + + // This should fail on Eisel-Lemire, forcing a fallback to simple decimal + // conversion. + runTest("12345678901234549760", 20, uint64_t(0x43e56a95319d63d8), + (__uint128_t(0x403eab54a9) << 40) + __uint128_t(0x8ceb1ec400), + (__uint128_t(0x403e56a95319d63d) << 64) + + __uint128_t(0x8800000000000000)); + + // Found while looking for difficult test cases here: + // https://github.com/nigeltao/parse-number-fxx-test-data/blob/main/more-test-cases/golang-org-issue-36657.txt + runTest("1090544144181609348835077142190", 31, uint64_t(0x462b8779f2474dfb), + (__uint128_t(0x4062dc3bcf) << 40) + __uint128_t(0x923a6fd402), + (__uint128_t(0x4062b8779f2474df) << 64) + + __uint128_t(0xa804bfd8c6d5c000)); + + runTest("0x123", 5, uint64_t(0x4072300000000000), + (__uint128_t(0x4007918000) << 40), + (__uint128_t(0x4007230000000000) << 64)); +} + +// These are tests that have caused problems for doubles in the past. +TEST_F(LlvmLibcStrToLDTest, Float64SpecificFailures) { + runTest("3E70000000000000", 16, uint64_t(0x7FF0000000000000), + (__uint128_t(0x7fff800000) << 40), + (__uint128_t(0x7fff000000000000) << 64), ERANGE, ERANGE, ERANGE); + runTest("358416272e-33", 13, uint64_t(0x3adbbb2a68c9d0b9), + (__uint128_t(0x3fadddd953) << 40) + __uint128_t(0x464e85c400), + (__uint128_t(0x3fadbbb2a68c9d0b) << 64) + + __uint128_t(0x8800e7969e1c5fc8)); + runTest( + "2.16656806400000023841857910156251e9", 36, uint64_t(0x41e0246690000001), + (__uint128_t(0x401e812334) << 40) + __uint128_t(0x8000000400), + (__uint128_t(0x401e024669000000) << 64) + __uint128_t(0x800000000000018)); + runTest("27949676547093071875", 20, uint64_t(0x43f83e132bc608c9), + (__uint128_t(0x403fc1f099) << 40) + __uint128_t(0x5e30464402), + (__uint128_t(0x403f83e132bc608c) << 64) + + __uint128_t(0x8803000000000000)); +} + +TEST_F(LlvmLibcStrToLDTest, MaxSizeNumbers) { + runTest("1.1897314953572317650e4932", 26, uint64_t(0x7FF0000000000000), + (__uint128_t(0x7ffeffffff) << 40) + __uint128_t(0xffffffffff), + (__uint128_t(0x7ffeffffffffffff) << 64) + + __uint128_t(0xfffd57322e3f8675), + ERANGE, 0, 0); + runTest("1.18973149535723176508e4932", 27, uint64_t(0x7FF0000000000000), + (__uint128_t(0x7fff800000) << 40), + (__uint128_t(0x7ffeffffffffffff) << 64) + + __uint128_t(0xffffd2478338036c), + ERANGE, ERANGE, 0); +} + +// These tests check subnormal behavior for 80 bit and 128 bit floats. They will +// be too small for 64 bit floats. +TEST_F(LlvmLibcStrToLDTest, SubnormalTests) { + runTest("1e-4950", 7, uint64_t(0), (__uint128_t(0x00000000000000000003)), + (__uint128_t(0x000000000000000000057c9647e1a018)), ERANGE, ERANGE, + ERANGE); + runTest("1.89e-4951", 10, uint64_t(0), (__uint128_t(0x00000000000000000001)), + (__uint128_t(0x0000000000000000000109778a006738)), ERANGE, ERANGE, + ERANGE); + runTest("4e-4966", 7, uint64_t(0), (__uint128_t(0)), + (__uint128_t(0x00000000000000000000000000000001)), ERANGE, ERANGE, + ERANGE); +} + +TEST_F(LlvmLibcStrToLDTest, SmallNormalTests) { + runTest("3.37e-4932", 10, uint64_t(0), + (__uint128_t(0x1804cf7) << 40) + __uint128_t(0x908850712), + (__uint128_t(0x10099ee12110a) << 64) + __uint128_t(0xe24b75c0f50dc0c), + ERANGE, 0, 0); +} + +TEST_F(LlvmLibcStrToLDTest, ComplexHexadecimalTests) { + runTest("0x1p16383", 9, 0x7ff0000000000000, (__uint128_t(0x7ffe800000) << 40), + (__uint128_t(0x7ffe000000000000) << 64)); + runTest("0x123456789abcdef", 17, 0x43723456789abcdf, + (__uint128_t(0x403791a2b3) << 40) + __uint128_t(0xc4d5e6f780), + (__uint128_t(0x403723456789abcd) << 64) + + __uint128_t(0xef00000000000000)); + runTest("0x123456789abcdef0123456789ABCDEF", 33, 0x7ff0000000000000, + (__uint128_t(0x407791a2b3) << 40) + __uint128_t(0xc4d5e6f781), + (__uint128_t(0x407723456789abcd) << 64) + + __uint128_t(0xef0123456789abce)); +} + +TEST_F(LlvmLibcStrToLDTest, InfTests) { + runTest("INF", 3, 0x7ff0000000000000, (__uint128_t(0x7fff800000) << 40), + (__uint128_t(0x7fff000000000000) << 64)); + runTest("INFinity", 8, 0x7ff0000000000000, (__uint128_t(0x7fff800000) << 40), + (__uint128_t(0x7fff000000000000) << 64)); + runTest("-inf", 4, 0xfff0000000000000, (__uint128_t(0xffff800000) << 40), + (__uint128_t(0xffff000000000000) << 64)); +} + +TEST_F(LlvmLibcStrToLDTest, NaNTests) { + runTest("NaN", 3, 0x7ff8000000000000, (__uint128_t(0x7fffc00000) << 40), + (__uint128_t(0x7fff800000000000) << 64)); + runTest("-nAn", 4, 0xfff8000000000000, (__uint128_t(0xffffc00000) << 40), + (__uint128_t(0xffff800000000000) << 64)); + runTest("NaN()", 5, 0x7ff8000000000000, (__uint128_t(0x7fffc00000) << 40), + (__uint128_t(0x7fff800000000000) << 64)); + runTest("NaN(1234)", 9, 0x7ff80000000004d2, + (__uint128_t(0x7fffc00000) << 40) + __uint128_t(0x4d2), + (__uint128_t(0x7fff800000000000) << 64) + __uint128_t(0x4d2)); + runTest("NaN(0xffffffffffff)", 19, 0x7ff8ffffffffffff, + (__uint128_t(0x7fffc000ff) << 40) + __uint128_t(0xffffffffff), + (__uint128_t(0x7fff800000000000) << 64) + + __uint128_t(0xffffffffffff)); + runTest("NaN(0xfffffffffffff)", 20, 0x7fffffffffffffff, + (__uint128_t(0x7fffc00fff) << 40) + __uint128_t(0xffffffffff), + (__uint128_t(0x7fff800000000000) << 64) + + __uint128_t(0xfffffffffffff)); + runTest("NaN(0xffffffffffffffff)", 23, 0x7fffffffffffffff, + (__uint128_t(0x7fffffffff) << 40) + __uint128_t(0xffffffffff), + (__uint128_t(0x7fff800000000000) << 64) + + __uint128_t(0xffffffffffffffff)); + runTest("NaN( 1234)", 3, 0x7ff8000000000000, + (__uint128_t(0x7fffc00000) << 40), + (__uint128_t(0x7fff800000000000) << 64)); +}