diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt --- a/libc/src/__support/CMakeLists.txt +++ b/libc/src/__support/CMakeLists.txt @@ -102,6 +102,7 @@ HDRS float_to_string.h ryu_constants.h + ryu_long_double_constants.h DEPENDS .libc_assert libc.src.__support.CPP.type_traits diff --git a/libc/src/__support/FPUtil/dyadic_float.h b/libc/src/__support/FPUtil/dyadic_float.h --- a/libc/src/__support/FPUtil/dyadic_float.h +++ b/libc/src/__support/FPUtil/dyadic_float.h @@ -52,14 +52,14 @@ normalize(); } - DyadicFloat(bool s, int e, MantissaType m) + constexpr DyadicFloat(bool s, int e, MantissaType m) : sign(s), exponent(e), mantissa(m) { normalize(); } // Normalizing the mantissa, bringing the leading 1 bit to the most // significant bit. - DyadicFloat &normalize() { + constexpr DyadicFloat &normalize() { if (!mantissa.is_zero()) { int shift_length = static_cast(mantissa.clz()); exponent -= shift_length; @@ -118,6 +118,24 @@ // Still correct without FMA instructions if `d_lo` is not underflow. return multiply_add(d_lo.get_val(), T(round_and_sticky), d_hi.get_val()); } + + explicit operator MantissaType() const { + if (mantissa.is_zero()) + return 0; + + MantissaType new_mant = mantissa; + if (exponent > 0) { + new_mant <<= exponent; + } else { + new_mant >>= (-exponent); + } + + if (sign) { + new_mant = (~new_mant) + 1; + } + + return new_mant; + } }; // Quick add - Add 2 dyadic floats with rounding toward 0 and then normalize the @@ -208,6 +226,30 @@ return result; } +// Simple exponentiation implementation for printf. Only handles positive +// exponents, since division isn't implemented. +template +constexpr DyadicFloat pow_n(DyadicFloat a, uint32_t power) { + DyadicFloat result = 1.0; + DyadicFloat cur_power = a; + + while (power > 0) { + if ((power % 2) > 0) { + result = quick_mul(result, cur_power); + } + power = power >> 1; + cur_power = quick_mul(cur_power, cur_power); + } + return result; +} + +template +constexpr DyadicFloat mul_pow_2(DyadicFloat a, int32_t pow_2) { + DyadicFloat result = a; + result.exponent += pow_2; + return result; +} + } // namespace __llvm_libc::fputil #endif // LLVM_LIBC_SRC_SUPPORT_FPUTIL_DYADIC_FLOAT_H diff --git a/libc/src/__support/float_to_string.h b/libc/src/__support/float_to_string.h --- a/libc/src/__support/float_to_string.h +++ b/libc/src/__support/float_to_string.h @@ -13,10 +13,65 @@ #include "src/__support/CPP/type_traits.h" #include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/dyadic_float.h" #include "src/__support/UInt.h" #include "src/__support/common.h" #include "src/__support/libc_assert.h" + +// This file has 5 compile-time flags to allow the user to configure the float +// to string behavior. These allow the user to select which 2 of the 3 useful +// properties they want. The useful properties are: +// 1) Speed of Evaluation +// 2) Small Size of Binary +// 3) Centered Output Value +// These are explained below with the flags that are missing each one. + +// LIBC_COPT_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE +// The Mega Table is ~5 megabytes when compiled. It lists the constants needed +// to perform the Ryu Printf algorithm (described below) for all long double +// values. This makes it extremely fast for both doubles and long doubles, in +// exchange for large binary size. + +// LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT +// LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT_LD +// Dyadic floats are software floating point numbers, and their accuracy can be +// as high as necessary. This option uses 256 bit dyadic floats to calculate +// the table values that Ryu Printf needs. This is reasonably fast and very +// small compared to the Mega Table, but the 256 bit floats only give accurate +// results for the first ~50 digits of the output. In practice this shouldn't +// be a problem since long doubles are only accurate for ~35 digits, but the +// trailing values all being 0s may cause brittle tests to fail. The _LD +// version of this flag only effects the long double calculations, and the +// other version effects both long double and double. + +// LIBC_COPT_FLOAT_TO_STR_USE_INT_CALC +// Integer Calculation uses wide integers to do the calculations for the Ryu +// Printf table, which is just as accurate as the Mega Table without requiring +// as much code size. These integers can be very large (~32KB at max, though +// always on the stack) to handle the edges of the long double range. They are +// also very slow, taking multiple seconds on a powerful CPU to calculate the +// values at the end of the range. If no flag is set, this is used for long +// doubles, the flag only changes the double behavior. + +// LIBC_COPT_FLOAT_TO_STR_NO_TABLE +// This flag doesn't change the actual calculation method, instead it is used +// to disable the normal Ryu Printf table for configurations that don't use any +// table at all. + +// Default Config: +// If no flags are set, doubles use the normal (and much more reasonably sized) +// Ryu Printf table and long doubles use Integer Calculation. This is because +// long doubles are rarely used and the normal Ryu Printf table is very fast +// for doubles. + +#ifdef LIBC_COPT_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE +#include "src/__support/ryu_long_double_constants.h" +#elif !defined(LIBC_COPT_FLOAT_TO_STR_NO_TABLE) #include "src/__support/ryu_constants.h" +#else +constexpr size_t IDX_SIZE = 1; +constexpr size_t MID_INT_SIZE = 192; +#endif // This implementation is based on the Ryu Printf algorithm by Ulf Adams: // Ulf Adams. 2019. Ryƫ revisited: printf floating point conversion. @@ -52,20 +107,41 @@ using MantissaInt = fputil::FPBits::UIntType; -constexpr size_t POW10_ADDITIONAL_BITS_CALC = 128; -constexpr size_t POW10_ADDITIONAL_BITS_TABLE = 120; - -constexpr size_t MID_INT_SIZE = 192; +// Larger numbers prefer a slightly larger constant than is used for the smaller +// numbers. +constexpr size_t CALC_SHIFT_CONST = 128; namespace internal { -// Returns floor(log_10(2^e)); requires 0 <= e <= 1650. -LIBC_INLINE constexpr uint32_t log10_pow2(const uint32_t e) { - // The first value this approximation fails for is 2^1651 which is just - // greater than 10^297. - LIBC_ASSERT(e >= 0 && e <= 1650 && +// Returns floor(log_10(2^e)); requires 0 <= e <= 42039. +LIBC_INLINE constexpr uint32_t log10_pow2(const uint64_t e) { + LIBC_ASSERT(e >= 0 && e <= 42039 && "Incorrect exponent to perform log10_pow2 approximation."); - return (e * 78913) >> 18; + // This approximation is based on the float value for log_10(2). It first + // gives an incorrect result for our purposes at 42039 (well beyond the 16383 + // maximum for long doubles). + + // To get these constants I first evaluated log_10(2) to get an approximation + // of 0.301029996. Next I passed that value through a string to double + // conversion to get an explicit mantissa of 0x13441350fbd738 and an exponent + // of -2 (which becomes -54 when we shift the mantissa to be a non-fractional + // number). Next I shifted the mantissa right 12 bits to create more space for + // the multiplication result, adding 12 to the exponent to compensate. To + // check that this approximation works for our purposes I used the following + // python code: + // for i in range(16384): + // if(len(str(2**i)) != (((i*0x13441350fbd)>>42)+1)): + // print(i) + // The reason we add 1 is because this evaluation truncates the result, giving + // us the floor, whereas counting the digits of the power of 2 gives us the + // ceiling. With a similar loop I checked the maximum valid value and found + // 42039. + return (e * 0x13441350fbdll) >> 42; +} + +// Same as above, but with different constants. +LIBC_INLINE constexpr uint32_t log2_pow5(const uint64_t e) { + return (e * 0x12934f0979bll) >> 39; } // Returns 1 + floor(log_10(2^e). This could technically be off by 1 if any @@ -81,7 +157,7 @@ LIBC_INLINE constexpr uint32_t length_for_num(const uint32_t idx, const uint32_t mantissa_width) { //+8 to round up when dividing by 9 - return (ceil_log10_pow2(16 * idx) + ceil_log10_pow2(mantissa_width) + + return (ceil_log10_pow2(idx) + ceil_log10_pow2(mantissa_width) + (BLOCK_SIZE - 1)) / BLOCK_SIZE; // return (ceil_log10_pow2(16 * idx + mantissa_width) + 8) / 9; @@ -95,14 +171,14 @@ // TODO: Fix long doubles (needs bigger table or alternate algorithm.) // Currently the table values are generated, which is very slow. template -LIBC_INLINE constexpr cpp::UInt -get_table_positive(int exponent, size_t i, const size_t constant) { +LIBC_INLINE constexpr cpp::UInt get_table_positive(int exponent, + size_t i) { // INT_SIZE is the size of int that is used for the internal calculations of // this function. It should be large enough to hold 2^(exponent+constant), so // ~1000 for double and ~16000 for long double. Be warned that the time // complexity of exponentiation is O(n^2 * log_2(m)) where n is the number of // bits in the number being exponentiated and m is the exponent. - int shift_amount = exponent + constant - (9 * i); + const int shift_amount = exponent + CALC_SHIFT_CONST - (BLOCK_SIZE * i); if (shift_amount < 0) { return 1; } @@ -110,8 +186,10 @@ // MOD_SIZE is one of the limiting factors for how big the constant argument // can get, since it needs to be small enough to fit in the result UInt, // otherwise we'll get truncation on return. - const cpp::UInt MOD_SIZE = - (cpp::UInt(1) << constant) * 1000000000; + constexpr cpp::UInt MOD_SIZE = + (cpp::UInt(1000000000) + << (CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0))); + constexpr uint64_t FIVE_EXP_NINE = 1953125; num = cpp::UInt(1) << (shift_amount); @@ -123,11 +201,64 @@ num = num + 1; if (num > MOD_SIZE) { - num = num % MOD_SIZE; + auto rem = + num.div_uint32_times_pow_2( + 1000000000, CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0)) + .value(); + num = rem; } return num; } +template +LIBC_INLINE cpp::UInt get_table_positive_df(int exponent, + size_t i) { + static_assert(INT_SIZE == 256, + "Only 256 is supported as an int size right now."); + // This version uses dyadic floats with 256 bit mantissas to perform the same + // calculation as above. Due to floating point imprecision it is only accurate + // for the first 50 digits, but it's much faster. Since even 128 bit long + // doubles are only accurate to ~35 digits, the 50 digits of accuracy are + // enough for these floats to be converted back and forth safely. This is + // ideal for avoiding the size of the long double table. + const int shift_amount = exponent + CALC_SHIFT_CONST - (9 * i); + if (shift_amount < 0) { + return 1; + } + fputil::DyadicFloat num(false, 0, 1); + constexpr cpp::UInt MOD_SIZE = + (cpp::UInt(1000000000) + << (CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0))); + + constexpr cpp::UInt FIVE_EXP_MINUS_NINE_MANT{ + {0xf387295d242602a7, 0xfdd7645e011abac9, 0x31680a88f8953030, + 0x89705f4136b4a597}}; + + static const fputil::DyadicFloat FIVE_EXP_MINUS_NINE( + false, -276, FIVE_EXP_MINUS_NINE_MANT); + + if (i > 0) { + fputil::DyadicFloat fives = fputil::pow_n(FIVE_EXP_MINUS_NINE, i); + num = fives; + } + num = mul_pow_2(num, shift_amount); + + // Adding one is part of the formula. + cpp::UInt int_num = static_cast>(num) + 1; + if (int_num > MOD_SIZE) { + auto rem = + int_num + .div_uint32_times_pow_2( + 1000000000, CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0)) + .value(); + int_num = rem; + } + + cpp::UInt result = int_num; + + return result; +} + // The formula for the table when i is negative (or zero) is as follows: // floor(10^(-9i) * 2^(c_0 - e)) % (10^9 * 2^c_0) // Since we know i is always negative, we just take it as unsigned and treat it @@ -136,40 +267,44 @@ // calculations. // The formula being used looks more like this: // floor(10^(9*(-i)) * 2^(c_0 + (-e))) % (10^9 * 2^c_0) -LIBC_INLINE cpp::UInt get_table_negative(int exponent, size_t i, - const size_t constant) { - constexpr size_t INT_SIZE = 1024; - int shift_amount = constant - exponent; +template +LIBC_INLINE cpp::UInt get_table_negative(int exponent, size_t i) { + int shift_amount = CALC_SHIFT_CONST - exponent; cpp::UInt num(1); - // const cpp::UInt MOD_SIZE = - // (cpp::UInt(1) << constant) * 1000000000; + constexpr cpp::UInt MOD_SIZE = + (cpp::UInt(1000000000) + << (CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0))); constexpr uint64_t TEN_EXP_NINE = 1000000000; constexpr uint64_t FIVE_EXP_NINE = 1953125; size_t ten_blocks = i; size_t five_blocks = 0; if (shift_amount < 0) { - int block_shifts = (-shift_amount) / 9; + int block_shifts = (-shift_amount) / BLOCK_SIZE; if (block_shifts < static_cast(ten_blocks)) { ten_blocks = ten_blocks - block_shifts; five_blocks = block_shifts; - shift_amount = shift_amount + (block_shifts * 9); + shift_amount = shift_amount + (block_shifts * BLOCK_SIZE); } else { ten_blocks = 0; five_blocks = i; - shift_amount = shift_amount + (i * 9); + shift_amount = shift_amount + (i * BLOCK_SIZE); } } if (five_blocks > 0) { cpp::UInt fives(FIVE_EXP_NINE); fives.pow_n(five_blocks); - num *= fives; + num = fives; } if (ten_blocks > 0) { cpp::UInt tens(TEN_EXP_NINE); tens.pow_n(ten_blocks); - num *= tens; + if (five_blocks <= 0) { + num = tens; + } else { + num *= tens; + } } if (shift_amount > 0) { @@ -177,12 +312,61 @@ } else { num = num >> (-shift_amount); } - // if (num > MOD_SIZE) { - // num = num % MOD_SIZE; - // } + if (num > MOD_SIZE) { + auto rem = + num.div_uint32_times_pow_2( + 1000000000, CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0)) + .value(); + num = rem; + } return num; } +template +LIBC_INLINE cpp::UInt get_table_negative_df(int exponent, + size_t i) { + static_assert(INT_SIZE == 256, + "Only 256 is supported as an int size right now."); + // This version uses dyadic floats with 256 bit mantissas to perform the same + // calculation as above. Due to floating point imprecision it is only accurate + // for the first 50 digits, but it's much faster. Since even 128 bit long + // doubles are only accurate to ~35 digits, the 50 digits of accuracy are + // enough for these floats to be converted back and forth safely. This is + // ideal for avoiding the size of the long double table. + + int shift_amount = CALC_SHIFT_CONST - exponent; + + fputil::DyadicFloat num(false, 0, 1); + constexpr cpp::UInt MOD_SIZE = + (cpp::UInt(1000000000) + << (CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0))); + + constexpr cpp::UInt TEN_EXP_NINE_MANT(1000000000); + + static const fputil::DyadicFloat TEN_EXP_NINE(false, 0, + TEN_EXP_NINE_MANT); + + if (i > 0) { + fputil::DyadicFloat tens = fputil::pow_n(TEN_EXP_NINE, i); + num = tens; + } + num = mul_pow_2(num, shift_amount); + + cpp::UInt int_num = static_cast>(num); + if (int_num > MOD_SIZE) { + auto rem = + int_num + .div_uint32_times_pow_2( + 1000000000, CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0)) + .value(); + int_num = rem; + } + + cpp::UInt result = int_num; + + return result; +} + LIBC_INLINE uint32_t fast_uint_mod_1e9(const cpp::UInt &val) { // The formula for mult_const is: // 1 + floor((2^(bits in target integer size + log_2(divider))) / divider) @@ -205,7 +389,9 @@ wide_mant[0] = mantissa & (uint64_t(-1)); wide_mant[1] = mantissa >> 64; val = (val * wide_mant) >> shift_amount; - return fast_uint_mod_1e9(val); + + return val.div_uint32_times_pow_2(1000000000, 0).value()[0]; + // return fast_uint_mod_1e9(val); } } // namespace internal @@ -236,8 +422,6 @@ static constexpr int MANT_WIDTH = fputil::MantissaWidth::VALUE; static constexpr int EXP_BIAS = fputil::FPBits::EXPONENT_BIAS; - // constexpr void init_convert(); - public: LIBC_INLINE constexpr FloatToString(T init_float) : float_bits(init_float) { is_negative = float_bits.get_sign(); @@ -273,18 +457,49 @@ // find the coarse section of the POW10_SPLIT table that will be used to // calculate the 9 digit window, as well as some other related values. const uint32_t idx = - exponent < 0 ? 0 : static_cast(exponent + 15) / 16; + exponent < 0 + ? 0 + : static_cast(exponent + (IDX_SIZE - 1)) / IDX_SIZE; - uint32_t temp_shift_amount = - POW10_ADDITIONAL_BITS_TABLE + (16 * idx) - exponent; - const uint32_t shift_amount = temp_shift_amount; // shift_amount = -(c0 - exponent) = c_0 + 16 * ceil(exponent/16) - // exponent - int32_t i = block_index; cpp::UInt val; - val = POW10_SPLIT[POW10_OFFSET[idx] + i]; +#if defined(LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT) + // ----------------------- DYADIC FLOAT CALC MODE ------------------------ + const int32_t SHIFT_CONST = CALC_SHIFT_CONST; + val = internal::get_table_positive_df<256>(IDX_SIZE * idx, block_index); +#elif defined(LIBC_COPT_FLOAT_TO_STR_USE_INT_CALC) + + // ---------------------------- INT CALC MODE ---------------------------- + const int32_t SHIFT_CONST = CALC_SHIFT_CONST; + + const uint64_t MAX_POW_2_SIZE = + exponent + CALC_SHIFT_CONST - (BLOCK_SIZE * block_index); + const uint64_t MAX_POW_5_SIZE = + internal::log2_pow5(BLOCK_SIZE * block_index); + const uint64_t MAX_INT_SIZE = + (MAX_POW_2_SIZE > MAX_POW_5_SIZE) ? MAX_POW_2_SIZE : MAX_POW_5_SIZE; + + if (MAX_INT_SIZE < 1024) { + val = internal::get_table_positive<1024>(IDX_SIZE * idx, block_index); + } else if (MAX_INT_SIZE < 2048) { + val = internal::get_table_positive<2048>(IDX_SIZE * idx, block_index); + } else if (MAX_INT_SIZE < 4096) { + val = internal::get_table_positive<4096>(IDX_SIZE * idx, block_index); + } else if (MAX_INT_SIZE < 8192) { + val = internal::get_table_positive<8192>(IDX_SIZE * idx, block_index); + } else { + val = internal::get_table_positive<16384>(IDX_SIZE * idx, block_index); + } +#else + // ----------------------------- TABLE MODE ------------------------------ + const int32_t SHIFT_CONST = TABLE_SHIFT_CONST; + + val = POW10_SPLIT[POW10_OFFSET[idx] + block_index]; +#endif + const uint32_t shift_amount = SHIFT_CONST + (IDX_SIZE * idx) - exponent; const uint32_t digits = internal::mul_shift_mod_1e9(mantissa, val, (int32_t)(shift_amount)); return digits; @@ -295,25 +510,58 @@ LIBC_INLINE constexpr BlockInt get_negative_block(int block_index) { if (exponent < 0) { - const int32_t idx = -exponent / 16; - uint32_t i = block_index; + const int32_t idx = -exponent / IDX_SIZE; + + cpp::UInt val; + +#if defined(LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT) + // ----------------------- DYADIC FLOAT CALC MODE ------------------------ + const int32_t SHIFT_CONST = CALC_SHIFT_CONST; + val = + internal::get_table_negative_df<256>(idx * IDX_SIZE, block_index + 1); +#elif defined(LIBC_COPT_FLOAT_TO_STR_USE_INT_CALC) + // ---------------------------- INT CALC MODE ---------------------------- + const int32_t SHIFT_CONST = CALC_SHIFT_CONST; + const uint64_t TEN_BLOCKS = (block_index + 1) * BLOCK_SIZE; + const uint64_t MAX_INT_SIZE = internal::log2_pow5(TEN_BLOCKS); + + if (MAX_INT_SIZE < 1024) { + val = + internal::get_table_negative<1024>(idx * IDX_SIZE, block_index + 1); + } else if (MAX_INT_SIZE < 2048) { + val = + internal::get_table_negative<2048>(idx * IDX_SIZE, block_index + 1); + } else if (MAX_INT_SIZE < 4096) { + val = + internal::get_table_negative<4096>(idx * IDX_SIZE, block_index + 1); + } else if (MAX_INT_SIZE < 8192) { + val = + internal::get_table_negative<8192>(idx * IDX_SIZE, block_index + 1); + } else if (MAX_INT_SIZE < 16384) { + val = internal::get_table_negative<16384>(idx * IDX_SIZE, + block_index + 1); + } else { + val = internal::get_table_negative<32768>(idx * IDX_SIZE, + block_index + 1); + } +#else + // ----------------------------- TABLE MODE ------------------------------ // if the requested block is zero - if (i < MIN_BLOCK_2[idx]) { + const int32_t SHIFT_CONST = TABLE_SHIFT_CONST; + if (block_index < MIN_BLOCK_2[idx]) { return 0; } - const int32_t shift_amount = - POW10_ADDITIONAL_BITS_TABLE + (-exponent - 16 * idx); - const uint32_t p = POW10_OFFSET_2[idx] + i - MIN_BLOCK_2[idx]; + const uint32_t p = POW10_OFFSET_2[idx] + block_index - MIN_BLOCK_2[idx]; // If every digit after the requested block is zero. if (p >= POW10_OFFSET_2[idx + 1]) { return 0; } - cpp::UInt table_val = POW10_SPLIT_2[p]; - // cpp::UInt calculated_val = - // get_table_negative(idx * 16, i + 1, POW10_ADDITIONAL_BITS_CALC); + val = POW10_SPLIT_2[p]; +#endif + const int32_t shift_amount = SHIFT_CONST + (-exponent - IDX_SIZE * idx); uint32_t digits = - internal::mul_shift_mod_1e9(mantissa, table_val, shift_amount); + internal::mul_shift_mod_1e9(mantissa, val, shift_amount); return digits; } else { return 0; @@ -331,8 +579,10 @@ LIBC_INLINE constexpr size_t get_positive_blocks() { if (exponent >= -MANT_WIDTH) { const uint32_t idx = - exponent < 0 ? 0 : static_cast(exponent + 15) / 16; - const uint32_t len = internal::length_for_num(idx, MANT_WIDTH); + exponent < 0 + ? 0 + : static_cast(exponent + (IDX_SIZE - 1)) / IDX_SIZE; + const uint32_t len = internal::length_for_num(idx * IDX_SIZE, MANT_WIDTH); return len; } else { return 0; @@ -342,30 +592,53 @@ // This takes the index of a block after the decimal point (a negative block) // and return if it's sure that all of the digits after it are zero. LIBC_INLINE constexpr bool is_lowest_block(size_t block_index) { - const int32_t idx = -exponent / 16; +#ifdef LIBC_COPT_FLOAT_TO_STR_NO_TABLE + return false; +#else + const int32_t idx = -exponent / IDX_SIZE; const uint32_t p = POW10_OFFSET_2[idx] + block_index - MIN_BLOCK_2[idx]; // If the remaining digits are all 0, then this is the lowest block. return p >= POW10_OFFSET_2[idx + 1]; +#endif } LIBC_INLINE constexpr size_t zero_blocks_after_point() { - return MIN_BLOCK_2[-exponent / 16]; +#ifdef LIBC_COPT_FLOAT_TO_STR_NO_TABLE + return 0; + // TODO (michaelrj): Find a good algorithm for this that doesn't use a + // table. +#else + return MIN_BLOCK_2[-exponent / IDX_SIZE]; +#endif } }; -// template <> constexpr void FloatToString::init_convert() { ; } +#ifndef LONG_DOUBLE_IS_DOUBLE +// --------------------------- LONG DOUBLE FUNCTIONS --------------------------- -// template <> constexpr void FloatToString::init_convert() { ; } - -// template <> constexpr void FloatToString::init_convert() { -// // TODO: More here. -// ; -// } +template <> +LIBC_INLINE constexpr size_t FloatToString::get_positive_blocks() { + if (exponent >= -MANT_WIDTH) { + const uint32_t idx = + exponent < 0 + ? 0 + : static_cast(exponent + (IDX_SIZE - 1)) / IDX_SIZE; + const uint32_t len = internal::length_for_num(idx * IDX_SIZE, MANT_WIDTH); + return len; + } else { + return 0; + } +} template <> LIBC_INLINE constexpr size_t FloatToString::zero_blocks_after_point() { +#ifdef LIBC_COPT_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE + return MIN_BLOCK_2[-exponent / IDX_SIZE]; +#else return 0; + // TODO (michaelrj): Find a good algorithm for this that doesn't use a table. +#endif } template <> @@ -377,29 +650,52 @@ LIBC_INLINE constexpr BlockInt FloatToString::get_positive_block(int block_index) { if (exponent >= -MANT_WIDTH) { - const uint32_t pos_exp = (exponent < 0 ? 0 : exponent); - uint32_t temp_shift_amount = - POW10_ADDITIONAL_BITS_CALC + pos_exp - exponent; - const uint32_t shift_amount = temp_shift_amount; - // shift_amount = -(c0 - exponent) = c_0 + 16 * ceil(exponent/16) - - // exponent + // idx is ceil(exponent/16) or 0 if exponent is negative. This is used to + // find the coarse section of the POW10_SPLIT table that will be used to + // calculate the 9 digit window, as well as some other related values. + const uint32_t idx = + exponent < 0 + ? 0 + : static_cast(exponent + (IDX_SIZE - 1)) / IDX_SIZE; + const uint32_t pos_exp = idx * IDX_SIZE; + + // shift_amount = -(c0 - exponent) = c_0 + 16 * ceil(exponent/16) - exponent - int32_t i = block_index; cpp::UInt val; - if (exponent + POW10_ADDITIONAL_BITS_CALC < 1024) { - val = internal::get_table_positive<1024>(pos_exp, i, - POW10_ADDITIONAL_BITS_CALC); - } else if (exponent + POW10_ADDITIONAL_BITS_CALC < 4096) { - val = internal::get_table_positive<4096>(pos_exp, i, - POW10_ADDITIONAL_BITS_CALC); - } else if (exponent + POW10_ADDITIONAL_BITS_CALC < 8192) { - val = internal::get_table_positive<8192>(pos_exp, i, - POW10_ADDITIONAL_BITS_CALC); +#ifdef LIBC_COPT_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE + // ------------------------------ TABLE MODE ------------------------------- + const int32_t SHIFT_CONST = TABLE_SHIFT_CONST; + val = POW10_SPLIT[POW10_OFFSET[idx] + block_index]; + +#elif defined(LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT) || \ + defined(LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT_LD) + // ------------------------ DYADIC FLOAT CALC MODE ------------------------- + const int32_t SHIFT_CONST = CALC_SHIFT_CONST; + val = internal::get_table_positive_df<256>(pos_exp, block_index); +#else + // ----------------------------- INT CALC MODE ----------------------------- + const int32_t SHIFT_CONST = CALC_SHIFT_CONST; + const uint64_t MAX_POW_2_SIZE = + exponent + CALC_SHIFT_CONST - (BLOCK_SIZE * block_index); + const uint64_t MAX_POW_5_SIZE = + internal::log2_pow5(BLOCK_SIZE * block_index); + const uint64_t MAX_INT_SIZE = + (MAX_POW_2_SIZE > MAX_POW_5_SIZE) ? MAX_POW_2_SIZE : MAX_POW_5_SIZE; + + if (MAX_INT_SIZE < 1024) { + val = internal::get_table_positive<1024>(pos_exp, block_index); + } else if (MAX_INT_SIZE < 2048) { + val = internal::get_table_positive<2048>(pos_exp, block_index); + } else if (MAX_INT_SIZE < 4096) { + val = internal::get_table_positive<4096>(pos_exp, block_index); + } else if (MAX_INT_SIZE < 8192) { + val = internal::get_table_positive<8192>(pos_exp, block_index); } else { - val = internal::get_table_positive<16384>(pos_exp, i, - POW10_ADDITIONAL_BITS_CALC); + val = internal::get_table_positive<16384>(pos_exp, block_index); } +#endif + const uint32_t shift_amount = SHIFT_CONST + pos_exp - exponent; const BlockInt digits = internal::mul_shift_mod_1e9(mantissa, val, (int32_t)(shift_amount)); @@ -413,31 +709,62 @@ LIBC_INLINE constexpr BlockInt FloatToString::get_negative_block(int block_index) { if (exponent < 0) { - const int32_t idx = -exponent / 16; - uint32_t i = -1 - block_index; + const int32_t idx = -exponent / IDX_SIZE; + + cpp::UInt val; +#ifdef LIBC_COPT_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE + // ------------------------------ TABLE MODE ------------------------------- + const int32_t SHIFT_CONST = TABLE_SHIFT_CONST; + // if the requested block is zero - if (i <= MIN_BLOCK_2[idx]) { + if (block_index <= MIN_BLOCK_2[idx]) { return 0; } - const int32_t shift_amount = - POW10_ADDITIONAL_BITS_CALC + (-exponent - 16 * idx); - const uint32_t p = POW10_OFFSET_2[idx] + i - MIN_BLOCK_2[idx]; + const uint32_t p = POW10_OFFSET_2[idx] + block_index - MIN_BLOCK_2[idx]; // If every digit after the requested block is zero. if (p >= POW10_OFFSET_2[idx + 1]) { return 0; } - - // cpp::UInt table_val = POW10_SPLIT_2[p]; - cpp::UInt calculated_val = internal::get_table_negative( - idx * 16, i + 1, POW10_ADDITIONAL_BITS_CALC); - BlockInt digits = - internal::mul_shift_mod_1e9(mantissa, calculated_val, shift_amount); + val = POW10_SPLIT_2[p]; +#elif defined(LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT) || \ + defined(LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT_LD) + // ------------------------ DYADIC FLOAT CALC MODE ------------------------- + const int32_t SHIFT_CONST = CALC_SHIFT_CONST; + + val = internal::get_table_negative_df<256>(idx * IDX_SIZE, block_index + 1); +#else // table mode + // ----------------------------- INT CALC MODE ----------------------------- + const int32_t SHIFT_CONST = CALC_SHIFT_CONST; + + const uint64_t TEN_BLOCKS = (block_index + 1) * BLOCK_SIZE; + const uint64_t MAX_INT_SIZE = internal::log2_pow5(TEN_BLOCKS); + + if (MAX_INT_SIZE < 1024) { + val = internal::get_table_negative<1024>(idx * IDX_SIZE, block_index + 1); + } else if (MAX_INT_SIZE < 2048) { + val = internal::get_table_negative<2048>(idx * IDX_SIZE, block_index + 1); + } else if (MAX_INT_SIZE < 4096) { + val = internal::get_table_negative<4096>(idx * IDX_SIZE, block_index + 1); + } else if (MAX_INT_SIZE < 8192) { + val = internal::get_table_negative<8192>(idx * IDX_SIZE, block_index + 1); + } else if (MAX_INT_SIZE < 16384) { + val = + internal::get_table_negative<16384>(idx * IDX_SIZE, block_index + 1); + } else { + val = internal::get_table_negative<16384 + 8192>(idx * IDX_SIZE, + block_index + 1); + } +#endif + const int32_t shift_amount = SHIFT_CONST + (-exponent - IDX_SIZE * idx); + BlockInt digits = internal::mul_shift_mod_1e9(mantissa, val, shift_amount); return digits; } else { return 0; } } +#endif // LONG_DOUBLE_IS_DOUBLE + } // namespace __llvm_libc #endif // LLVM_LIBC_SRC_SUPPORT_FLOAT_TO_STRING_H diff --git a/libc/src/__support/ryu_constants.h b/libc/src/__support/ryu_constants.h --- a/libc/src/__support/ryu_constants.h +++ b/libc/src/__support/ryu_constants.h @@ -12,6 +12,11 @@ #include #include +constexpr size_t TABLE_SHIFT_CONST = 120; +constexpr size_t IDX_SIZE = 16; + +constexpr size_t MID_INT_SIZE = 192; + static const uint16_t POW10_OFFSET[64] = { 0, 2, 5, 8, 12, 16, 21, 26, 32, 39, 46, 54, 62, 71, 80, 90, 100, 111, 122, 134, 146, 159, 173, 187, 202, 217, diff --git a/libc/src/__support/ryu_long_double_constants.h b/libc/src/__support/ryu_long_double_constants.h new file mode 100644 --- /dev/null +++ b/libc/src/__support/ryu_long_double_constants.h @@ -0,0 +1,59 @@ +//===-- Ryu Constants for long double conversion ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_SUPPORT_RYU_LONG_DOUBLE_CONSTANTS_H +#define LLVM_LIBC_SRC_SUPPORT_RYU_LONG_DOUBLE_CONSTANTS_H + +#include +#include + +constexpr size_t TABLE_SHIFT_CONST = 120; +constexpr size_t IDX_SIZE = 128; + +constexpr size_t MID_INT_SIZE = (256 + 64); + +static const uint32_t POW10_OFFSET[] = { + 0, 4, 13, 26, 43, 64, 90, 120, 154, 193, 236, + 283, 334, 390, 450, 514, 582, 655, 732, 813, 899, 989, + 1083, 1181, 1284, 1391, 1502, 1618, 1738, 1862, 1990, 2123, 2260, + 2401, 2547, 2697, 2851, 3009, 3172, 3339, 3510, 3686, 3866, 4050, + 4238, 4431, 4628, 4829, 5034, 5244, 5458, 5676, 5899, 6126, 6357, + 6592, 6832, 7076, 7324, 7577, 7834, 8095, 8360, 8630, 8904, 9182, + 9465, 9752, 10043, 10338, 10638, 10942, 11250, 11563, 11880, 12201, 12526, + 12856, 13190, 13528, 13871, 14218, 14569, 14924, 15284, 15648, 16016, 16388, + 16765, 17146, 17531, 17921, 18315, 18713, 19115, 19522, 19933, 20348, 20768, + 21192, 21620, 22052, 22489, 22930, 23375, 23825, 24279, 24737, 25199, 25666, + 26137, 26612, 27092, 27576, 28064, 28556, 29053, 29554, 30059, 30568, 31082, + 31600, 32122, 32649, 33180, 33715, 34254, 34798, 35346}; + +static const uint32_t POW10_OFFSET_2[130] = { + 0, 15, 44, 83, 131, 190, 259, 338, 426, 524, 633, + 751, 879, 1017, 1166, 1324, 1492, 1670, 1858, 2056, 2264, 2482, + 2709, 2947, 3195, 3453, 3720, 3997, 4285, 4582, 4889, 5206, 5534, + 5871, 6218, 6575, 6941, 7318, 7705, 8102, 8508, 8925, 9352, 9788, + 10234, 10690, 11157, 11633, 12119, 12615, 13122, 13638, 14164, 14700, 15245, + 15801, 16367, 16943, 17528, 18124, 18730, 19345, 19970, 20605, 21251, 21906, + 22571, 23246, 23931, 24626, 25331, 26046, 26770, 27505, 28250, 29004, 29768, + 30543, 31328, 32122, 32926, 33740, 34565, 35399, 36243, 37097, 37961, 38835, + 39719, 40613, 41516, 42430, 43354, 44287, 45230, 46184, 47148, 48121, 49104, + 50097, 51100, 52113, 53136, 54169, 55212, 56265, 57328, 58400, 59482, 60575, + 61678, 62790, 63912, 65045, 66188, 67340, 68502, 69674, 70856, 72048, 73250, + 74462, 75684, 76916, 78158, 79409, 80670, 81942, 83224, 84515}; + +static const uint16_t MIN_BLOCK_2[130] = { + 0, 0, 4, 9, 13, 17, 21, 26, 30, 34, 39, 43, 47, 51, 56, + 60, 64, 68, 73, 77, 81, 86, 90, 94, 98, 103, 107, 111, 116, 120, + 124, 128, 133, 137, 141, 146, 150, 154, 158, 163, 167, 171, 176, 180, 184, + 188, 193, 197, 201, 205, 210, 214, 218, 223, 227, 231, 235, 240, 244, 248, + 253, 257, 261, 265, 270, 274, 278, 283, 287, 291, 295, 300, 304, 308, 313, + 317, 321, 325, 330, 334, 338, 342, 347, 351, 355, 360, 364, 368, 372, 377, + 381, 385, 390, 394, 398, 402, 407, 411, 415, 420, 424, 428, 432, 437, 441, + 445, 450, 454, 458, 462, 467, 471, 475, 479, 484, 488, 492, 497, 501, 505, + 509, 514, 518, 522, 527, 531, 535, 539, 544, 0}; + +#endif // LLVM_LIBC_SRC_SUPPORT_RYU_LONG_DOUBLE_CONSTANTS_H diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt --- a/libc/src/stdio/printf_core/CMakeLists.txt +++ b/libc/src/stdio/printf_core/CMakeLists.txt @@ -99,6 +99,8 @@ libc.src.__support.uint128 libc.src.__support.integer_to_string libc.src.__support.float_to_string + # COMPILE_OPTIONS + # -DLIBC_COPT_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE ) diff --git a/libc/src/stdio/printf_core/converter.cpp b/libc/src/stdio/printf_core/converter.cpp --- a/libc/src/stdio/printf_core/converter.cpp +++ b/libc/src/stdio/printf_core/converter.cpp @@ -28,8 +28,8 @@ if (!to_conv.has_conv) return writer->write(to_conv.raw_string); -#ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT - // TODO(michaelrj): Undo this once decimal long double support is done. +#if !defined(LIBC_COPT_PRINTF_DISABLE_FLOAT) && \ + defined(LIBC_COPT_PRINTF_HEX_LONG_DOUBLE) if (to_conv.length_modifier == LengthModifier::L) { switch (to_conv.conv_name) { case 'f': diff --git a/libc/src/stdio/printf_core/printf_config.h b/libc/src/stdio/printf_core/printf_config.h --- a/libc/src/stdio/printf_core/printf_config.h +++ b/libc/src/stdio/printf_core/printf_config.h @@ -29,6 +29,14 @@ #define LIBC_COPT_PRINTF_INDEX_ARR_LEN 128 #endif +// TODO(michaelrj): Provide a proper interface for these options. +// LIBC_COPT_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE +// LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT +// LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT_LD +// LIBC_COPT_FLOAT_TO_STR_USE_INT_CALC +// LIBC_COPT_FLOAT_TO_STR_NO_TABLE +// LIBC_COPT_PRINTF_HEX_LONG_DOUBLE + // TODO(michaelrj): Move the other printf configuration options into this file. #endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PRINTF_CONFIG_H diff --git a/libc/test/src/stdio/sprintf_test.cpp b/libc/test/src/stdio/sprintf_test.cpp --- a/libc/test/src/stdio/sprintf_test.cpp +++ b/libc/test/src/stdio/sprintf_test.cpp @@ -930,16 +930,19 @@ // TODO: Fix long doubles (needs bigger table or alternate algorithm.) // Currently the table values are generated, which is very slow. - /* + + written = __llvm_libc::sprintf(buff, "%Lf", 1.0L); + ASSERT_STREQ_LEN(written, buff, "1.000000"); + written = __llvm_libc::sprintf(buff, "%Lf", 1e100L); ASSERT_STREQ_LEN(written, buff, "99999999999999999996693535322073426194986990198284960792713" "91541752018669482644324418977840117055488.000000"); - written = __llvm_libc::sprintf(buff, "%Lf", 1.0L); - ASSERT_STREQ_LEN(written, buff, "1.000000"); - char big_buff[10000]; + + // written = __llvm_libc::sprintf(big_buff, "%Lf", 0x1p16383L); + written = __llvm_libc::sprintf(big_buff, "%Lf", 1e1000L); ASSERT_STREQ_LEN( written, big_buff, @@ -1031,7 +1034,122 @@ "739074789794941408428328217107759915202650066155868439585510978709442590" "231934194956788626761834746430104077432547436359522462253411168467463134" "24896.000000"); -*/ + + + written = __llvm_libc::sprintf(big_buff, "%.10Lf", 1e-10L); + ASSERT_STREQ_LEN( + written, big_buff, "0.0000000001"); + + written = __llvm_libc::sprintf(big_buff, "%.7500Lf", 1e-4900L); + ASSERT_STREQ_LEN( + written, big_buff, + "0." + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000000000000000" + "000099999999999999999996962764452956071352139203248614920751610856665084" + "549214352477698417183862158583009348897567779527408501588132175167211539" + "462139941448204886585901454195352527238724272760638086779284030512649793" + "039219351187928723378036480041948464946018272171365770411701020666925613" + "422460317465324758217878522666789603627480490870456508256359180089236338" + "765625231186929290294207420828927406735690318849109129700396907705735097" + "663944722727287361650042373203763784830198232253311807069225650324196304" + "532045014970637489181357566354288111205943347410488298480279857453705249" + "232862728556860184412369114663536200895729846877559808001004454634804626" + "541455540260282018142615835686583304903486353937549394736905011798466731" + "536563240053860118551127061960208467764243724656897127545613968909523389" + "577188368809623987105800147797280462974804046545425080530020901531407223" + "191237123282274818236437397994019915368657474589800678444589412286037789" + "891525464936023205313685584525510094270344601331453730179416773626565262" + "480345858564672442896904520146956686863172737711483866766404977719744767" + "834324844875237277613991088218774564658513875732403456058414595576806383" + "115554713240005982141397577420073082470139244845624915873825746771661332" + "098677966580506186966978746832443976821987300902957597498388211921362869" + "017846215557612829071692275292036211064515305528052919611691470945774714" + "135516559501572279732350629089770249554808690411603894492333360300589658" + "470898965370892774715815089075170720164713889237058574941489766701880158" + "060081295483989540170337129032188818293132770882381428397119039835946745" + "549356649433406617266370644136291924838857814675939156677910783740103207" + "523299367093130816446415259371931925208362367989095199399211644084543790" + "110432339056231037520216864358899218874658268610955002763260912337688947" + "822453100821038299301092582962825965939081817836419126254832772002214908" + "085575905761843610944187009818156363893015929300295112598059949496854566" + "638748010633726861510500653821408135845840123073754133549077708843800674" + "328440913743105608636458354618912183716456158809545183074062249922212944" + "249667793845728355381309084891765979111348980470647082269921872595470473" + "719354467594516320911964549508538492057120740224559944452120552719041944" + "961475548547884309626382512432626380881023756568143060204097921571153170" + "723817845809196253498326358439807445210362177680590181657555380795450462" + "223805222580359379367452693270553602179122419370586308101820559214330382" + "570449525088342437216896462077260223998756027453411520977536701491759878" + "422771447006016890777855573925295187921971811871399320142563330377888532" + "179817332113"); + /* written = __llvm_libc::sprintf(buff, "%La", 0.1L); #if defined(SPECIAL_X86_LONG_DOUBLE) diff --git a/libc/utils/mathtools/ryu_tablegen.py b/libc/utils/mathtools/ryu_tablegen.py new file mode 100644 --- /dev/null +++ b/libc/utils/mathtools/ryu_tablegen.py @@ -0,0 +1,204 @@ +""" +//===-- Table Generator for Ryu Printf ------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + + +This file is used to generate the tables of values in +src/__support/ryu_constants.h and ryu_long_double constants.h. To use it, set +the constants at the top of the file to the values you want to use for the Ryu +algorithm, then run this file. It will output the appropriate tables to stdout, +so it's recommended to pipe stdout to a file. The following is a brief +explenation of each of the constants. + +BLOCK_SIZE: + Default: 9 + The number of digits that will be calculated together in a block. + Don't touch this unless you really know what you're doing. + +CONSTANT: + Default: 120 + Also known as c_0 and c_1 in the Ryu Printf paper and SHIFT_CONST in + float_to_string.h. + The table value is shifted left by this amount, and the final value is + shifted right by this amount. It effectively makes the table value a fixed + point number with the decimal point at the bit that is CONSTANT bits from + the right. + Higher values increase accuracy, but also require higher MID_INT_WIDTH + values to fit the result. + +IDX_SIZE: + Default: 16 + By increasing the MOD_SIZE slightly we can significantly decrease the number + of table entries we need. + We can divide the number of table entries by IDX_SIZE, and multiply MOD_SIZE + by 2^IDX_SIZE, and the math still works out. + This optimization isn't mentioned in the original Ryu Printf paper but it + saves a lot of space. + +MID_INT_WIDTH: + Default: 192 + This is the size of integer that the tables use. It's called mid int because + it's the integer used in the middle of the calculation. There are large ints + used to calculate the mid int table values, then those are used to calculate + the small int final values. + This must be divisible by 64 since each table entry is an array of 64 bit + integers. + If this is too small, then the results will get cut off. It should be at + least CONSTANT + IDX_SIZE + log_2(10^9) to be able to fit the table values. + +MANT_WIDTH: + The width of the widest float mantissa that this table will work for. + This has a small effect on table size. + +EXP_WIDTH: + The width of the widest float exponent that this table will work for. + This has a large effect on table size. + Specifically, table size is proportional to the square of this number. +""" + +BLOCK_SIZE = 9 + + +# Values for double +# CONSTANT = 120 +# IDX_SIZE = 16 +# MANT_WIDTH = 52 +# EXP_WIDTH = 11 +# MID_INT_SIZE = 192 + +# Values for 128 bit float +CONSTANT = 120 +IDX_SIZE = 128 +MANT_WIDTH = 112 +EXP_WIDTH = 15 +MID_INT_SIZE = 256 + 64 + +MAX_EXP = 2 ** (EXP_WIDTH - 1) +POSITIVE_ARR_SIZE = MAX_EXP // IDX_SIZE +NEGATIVE_ARR_SIZE = (MAX_EXP // IDX_SIZE) + ((MANT_WIDTH + (IDX_SIZE - 1)) // IDX_SIZE) +MOD_SIZE = (10**BLOCK_SIZE) << (CONSTANT + (IDX_SIZE if IDX_SIZE > 1 else 0)) + + +# floor(5^(-9i) * 2^(e + c_1 - 9i) + 1) % (10^9 * 2^c_1) +def get_table_positive(exponent, i): + pow_of_two = 1 << (exponent + CONSTANT - (BLOCK_SIZE * i)) + pow_of_five = 5 ** (BLOCK_SIZE * i) + result = (pow_of_two // pow_of_five) + 1 + return result % MOD_SIZE + + +# floor(10^(9*(-i)) * 2^(c_0 + (-e))) % (10^9 * 2^c_0) +def get_table_negative(exponent, i): + result = 1 + pow_of_ten = 10 ** (BLOCK_SIZE * i) + shift_amount = CONSTANT - exponent + if shift_amount < 0: + result = pow_of_ten >> (-shift_amount) + else: + result = pow_of_ten << (shift_amount) + return result % MOD_SIZE + + +# Returns floor(log_10(2^e)); requires 0 <= e <= 42039. +def ceil_log10_pow2(e): + return ((e * 0x13441350FBD) >> 42) + 1 + + +def length_for_num(idx, index_size=IDX_SIZE): + return ( + ceil_log10_pow2(idx * index_size) + ceil_log10_pow2(MANT_WIDTH) + BLOCK_SIZE - 1 + ) // BLOCK_SIZE + + +def get_64bit_window(num, index): + return (num >> (index * 64)) % (2**64) + + +def mid_int_to_str(num): + outstr = " {" + outstr += str(get_64bit_window(num, 0)) + "u" + for i in range(1, MID_INT_SIZE // 64): + outstr += ", " + str(get_64bit_window(num, i)) + "u" + outstr += "}," + return outstr + + +def print_positive_table_for_idx(idx): + positive_blocks = length_for_num(idx) + for i in range(positive_blocks): + table_val = get_table_positive(idx * IDX_SIZE, i) + # print(hex(table_val)) + print(mid_int_to_str(table_val)) + return positive_blocks + + +def print_negative_table_for_idx(idx): + i = 0 + min_block = -1 + table_val = 0 + MIN_USEFUL_VAL = 2 ** (CONSTANT - (MANT_WIDTH + 2)) + # Iterate through the zero blocks + while table_val < MIN_USEFUL_VAL: + i += 1 + table_val = get_table_negative((idx) * IDX_SIZE, i) + else: + i -= 1 + + min_block = i + + # Iterate until another zero block is found + while table_val >= MIN_USEFUL_VAL: + table_val = get_table_negative((idx) * IDX_SIZE, i + 1) + if table_val >= MIN_USEFUL_VAL: + print(mid_int_to_str(table_val)) + i += 1 + return i - min_block, min_block + + +positive_size_arr = [0] * (POSITIVE_ARR_SIZE + 1) + +negative_size_arr = [0] * (NEGATIVE_ARR_SIZE + 1) +min_block_arr = [0] * (NEGATIVE_ARR_SIZE + 1) +acc = 0 + +if MOD_SIZE > (2**MID_INT_SIZE): + print( + "Mod size is too big for current MID_INT_SIZE by a factor of", + MOD_SIZE // (2**MID_INT_SIZE), + ) +else: + print("static const uint64_t POW10_SPLIT[][" + str(MID_INT_SIZE // 64) + "] = {") + for idx in range(0, POSITIVE_ARR_SIZE): + num_size = print_positive_table_for_idx(idx) + acc += num_size + positive_size_arr[idx + 1] = acc + print("};") + + print( + "static const uint32_t POW10_OFFSET_2[" + str(len(positive_size_arr)) + "] = {", + str(positive_size_arr)[1:-2], + "};", + ) + + print("static const uint64_t POW10_SPLIT_2[][" + str(MID_INT_SIZE // 64) + "] = {") + for idx in range(0, NEGATIVE_ARR_SIZE): + num_size, min_block = print_negative_table_for_idx(idx) + acc += num_size + negative_size_arr[idx + 1] = acc + min_block_arr[idx] = min_block + print("};") + print( + "static const uint32_t POW10_OFFSET_2[" + str(len(negative_size_arr)) + "] = {", + str(negative_size_arr)[1:-2], + "};", + ) + print( + "static const uint16_t MIN_BLOCK_2[" + str(len(min_block_arr)) + "] = {", + str(min_block_arr)[1:-2], + "};", + ) diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -310,10 +310,14 @@ hdrs = [ "src/__support/float_to_string.h", "src/__support/ryu_constants.h", + "src/__support/ryu_long_double_constants.h", ], + # This is temporarily commented out since the table is too large to land in the same patch as the rest of the changes. + # defines = ["LIBC_COPT_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE"], deps = [ ":__support_common", ":__support_cpp_type_traits", + ":__support_fputil_dyadic_float", ":__support_fputil_fp_bits", ":__support_libc_assert", ":__support_uint", @@ -759,6 +763,7 @@ ":__support_common", ":__support_fputil_multiply_add", ":__support_number_pair", + ":libc_root", ], ) @@ -772,6 +777,7 @@ ":__support_fputil_multiply_add", ":__support_macros_optimization", ":__support_uint", + ":libc_root", ], )