diff --git a/libc/src/__support/str_to_float.h b/libc/src/__support/str_to_float.h --- a/libc/src/__support/str_to_float.h +++ b/libc/src/__support/str_to_float.h @@ -182,6 +182,114 @@ return true; } +#if !defined(LONG_DOUBLE_IS_DOUBLE) +template <> +inline bool eisel_lemire( + typename fputil::FPBits::UIntType mantissa, int32_t exp10, + typename fputil::FPBits::UIntType *outputMantissa, + uint32_t *outputExp2) { + using BitsType = typename fputil::FPBits::UIntType; + constexpr uint32_t BITS_IN_MANTISSA = sizeof(mantissa) * 8; + + // Exp10 Range + // This doesn't reach very far into the range for long doubles, since it's + // sized for doubles and their 11 exponent bits, and not for long doubles and + // their 15 exponent bits (max exponent of ~300 for double vs ~5000 for long + // double). This is a known tradeoff, and was made because a proper long + // double table would be approximately 16 times larger. This would have + // significant memory and storage costs all the time to speed up a relatively + // uncommon path. In addition the exp10_to_exp2 function only approximates + // multiplying by log(10)/log(2), and that approximation may not be accurate + // out to the full long double range. + if (exp10 < DETAILED_POWERS_OF_TEN_MIN_EXP_10 || + exp10 > DETAILED_POWERS_OF_TEN_MAX_EXP_10) { + return false; + } + + // Normalization + uint32_t clz = leading_zeroes(mantissa); + mantissa <<= clz; + + uint32_t exp2 = exp10_to_exp2(exp10) + BITS_IN_MANTISSA + + fputil::FloatProperties::EXPONENT_BIAS - clz; + + // Multiplication + const uint64_t *power_of_ten = + DETAILED_POWERS_OF_TEN[exp10 - DETAILED_POWERS_OF_TEN_MIN_EXP_10]; + + // Since the input mantissa is more than 64 bits, we have to multiply with the + // full 128 bits of the power of ten to get an approximation with the same + // number of significant bits. This means that we only get the one + // approximation, and that approximation is 256 bits long. + __uint128_t approx_upper = static_cast<__uint128_t>(high64(mantissa)) * + static_cast<__uint128_t>(power_of_ten[1]); + + __uint128_t approx_middle = static_cast<__uint128_t>(high64(mantissa)) * + static_cast<__uint128_t>(power_of_ten[0]) + + static_cast<__uint128_t>(low64(mantissa)) * + static_cast<__uint128_t>(power_of_ten[1]); + + __uint128_t approx_lower = static_cast<__uint128_t>(low64(mantissa)) * + static_cast<__uint128_t>(power_of_ten[0]); + + __uint128_t final_approx_lower = + approx_lower + (static_cast<__uint128_t>(low64(approx_middle)) << 64); + __uint128_t final_approx_upper = approx_upper + high64(approx_middle) + + (final_approx_lower < approx_lower ? 1 : 0); + + // The halfway constant is used to check if the bits that will be shifted away + // intially are all 1. For 80 bit floats this is 128 (bitstype size) - 64 + // (final mantissa size) - 3 (we shift away the last two bits separately for + // accuracy, and the most significant bit is ignored.) = 61 bits. Similarly, + // it's 12 bits for 128 bit floats in this case. + constexpr __uint128_t HALFWAY_CONSTANT = + (__uint128_t(1) << (BITS_IN_MANTISSA - + fputil::FloatProperties::MANTISSA_WIDTH - + 3)) - + 1; + + if ((final_approx_upper & HALFWAY_CONSTANT) == HALFWAY_CONSTANT && + final_approx_lower + mantissa < mantissa) { + return false; + } + + // Shifting to 65 bits for 80 bit floats and 113 bits for 128 bit floats + BitsType msb = final_approx_upper >> (BITS_IN_MANTISSA - 1); + BitsType final_mantissa = + final_approx_upper >> + (msb + BITS_IN_MANTISSA - + (fputil::FloatProperties::MANTISSA_WIDTH + 3)); + exp2 -= 1 ^ msb; // same as !msb + + // Half-way ambiguity + if (final_approx_lower == 0 && (final_approx_upper & HALFWAY_CONSTANT) == 0 && + (final_mantissa & 3) == 1) { + return false; + } + + // From 65 to 64 bits for 80 bit floats and 113 to 112 bits for 128 bit + // floats + final_mantissa += final_mantissa & 1; + final_mantissa >>= 1; + if ((final_mantissa >> + (fputil::FloatProperties::MANTISSA_WIDTH + 1)) > 0) { + final_mantissa >>= 1; + ++exp2; + } + + // The if block is equivalent to (but has fewer branches than): + // if exp2 <= 0 || exp2 >= MANTISSA_MAX { etc } + if (exp2 - 1 >= + (1 << fputil::FloatProperties::EXPONENT_WIDTH) - 2) { + return false; + } + + *outputMantissa = final_mantissa; + *outputExp2 = exp2; + return true; +} +#endif + // The nth item in POWERS_OF_TWO represents the greatest power of two less than // 10^n. This tells us how much we can safely shift without overshooting. constexpr uint8_t POWERS_OF_TWO[19] = { diff --git a/libc/test/src/__support/str_to_float_test.cpp b/libc/test/src/__support/str_to_float_test.cpp --- a/libc/test/src/__support/str_to_float_test.cpp +++ b/libc/test/src/__support/str_to_float_test.cpp @@ -175,13 +175,8 @@ // Check the fallback states for the algorithm: uint32_t float_output_mantissa = 0; uint64_t double_output_mantissa = 0; - __uint128_t too_long_mantissa = 0; uint32_t output_exp2 = 0; - // This Eisel-Lemire implementation doesn't support long doubles yet. - ASSERT_FALSE(__llvm_libc::internal::eisel_lemire( - too_long_mantissa, 0, &too_long_mantissa, &output_exp2)); - // This number can't be evaluated by Eisel-Lemire since it's exactly 1024 away // from both of its closest floating point approximations // (12345678901234548736 and 12345678901234550784) @@ -272,3 +267,92 @@ // EXPECT_EQ(outputExp2, uint32_t(1089)); // EXPECT_EQ(errno, 0); } + +#if defined(LONG_DOUBLE_IS_DOUBLE) +TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat64AsLongDouble) { + eisel_lemire_test(123, 0, 0x1EC00000000000, 1029); +} +#elif defined(SPECIAL_X86_LONG_DOUBLE) +TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat80Simple) { + eisel_lemire_test(123, 0, 0xf600000000000000, 16389); + eisel_lemire_test(12345678901234568192u, 0, 0xab54a98ceb1f0c00, + 16446); +} + +TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat80LongerMantissa) { + eisel_lemire_test((__uint128_t(0x1234567812345678) << 64) + + __uint128_t(0x1234567812345678), + 0, 0x91a2b3c091a2b3c1, 16507); + eisel_lemire_test((__uint128_t(0x1234567812345678) << 64) + + __uint128_t(0x1234567812345678), + 300, 0xd97757de56adb65c, 17503); + eisel_lemire_test((__uint128_t(0x1234567812345678) << 64) + + __uint128_t(0x1234567812345678), + -300, 0xc30feb9a7618457d, 15510); +} + +// These tests check numbers at the edge of the DETAILED_POWERS_OF_TEN table. +// This doesn't reach very far into the range for long doubles, since it's sized +// for doubles and their 11 exponent bits, and not for long doubles and their +// 15 exponent bits. This is a known tradeoff, and was made because a proper +// long double table would be approximately 16 times longer (specifically the +// maximum exponent would need to be about 5000, leading to a 10,000 entry +// table). This would have significant memory and storage costs all the time to +// speed up a relatively uncommon path. +TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat80TableLimits) { + eisel_lemire_test(1, 347, 0xd13eb46469447567, 17535); + eisel_lemire_test(1, -348, 0xfa8fd5a0081c0288, 15226); +} + +TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat80Fallback) { + uint32_t outputExp2 = 0; + __uint128_t quadOutputMantissa = 0; + + // This number is halfway between two possible results, and the algorithm + // can't determine which is correct. + ASSERT_FALSE(__llvm_libc::internal::eisel_lemire( + 12345678901234567890u, 1, &quadOutputMantissa, &outputExp2)); + + // These numbers' exponents are out of range for the current powers of ten + // table. + ASSERT_FALSE(__llvm_libc::internal::eisel_lemire( + 1, 1000, &quadOutputMantissa, &outputExp2)); + ASSERT_FALSE(__llvm_libc::internal::eisel_lemire( + 1, -1000, &quadOutputMantissa, &outputExp2)); +} +#else +TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat128Simple) { + eisel_lemire_test(123, 0, (__uint128_t(0x1ec0000000000) << 64), + 16389); + eisel_lemire_test(12345678901234568192u, 0, + (__uint128_t(0x156a95319d63e) << 64) + + __uint128_t(0x1800000000000000), + 16446); +} + +TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat128LongerMantissa) { + eisel_lemire_test( + (__uint128_t(0x1234567812345678) << 64) + __uint128_t(0x1234567812345678), + 0, (__uint128_t(0x1234567812345) << 64) + __uint128_t(0x6781234567812345), + 16507); + eisel_lemire_test( + (__uint128_t(0x1234567812345678) << 64) + __uint128_t(0x1234567812345678), + 300, + (__uint128_t(0x1b2eeafbcad5b) << 64) + __uint128_t(0x6cb8b4451dfcde19), + 17503); + eisel_lemire_test( + (__uint128_t(0x1234567812345678) << 64) + __uint128_t(0x1234567812345678), + -300, + (__uint128_t(0x1861fd734ec30) << 64) + __uint128_t(0x8afa7189f0f7595f), + 15510); +} + +TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat128Fallback) { + uint32_t outputExp2 = 0; + __uint128_t quadOutputMantissa = 0; + + ASSERT_FALSE(__llvm_libc::internal::eisel_lemire( + (__uint128_t(0x5ce0e9a56015fec5) << 64) + __uint128_t(0xaadfa328ae39b333), + 1, &quadOutputMantissa, &outputExp2)); +} +#endif