diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt --- a/libc/src/__support/CMakeLists.txt +++ b/libc/src/__support/CMakeLists.txt @@ -109,6 +109,7 @@ .ctype_utils .high_precision_decimal .str_to_integer + .str_to_num_result .uint128 libc.include.errno libc.src.__support.CPP.limits diff --git a/libc/src/__support/high_precision_decimal.h b/libc/src/__support/high_precision_decimal.h --- a/libc/src/__support/high_precision_decimal.h +++ b/libc/src/__support/high_precision_decimal.h @@ -21,6 +21,10 @@ char const *power_of_five; }; +// This is used in both this file and in the main str_to_float.h. +// TODO: Figure out where to put this. +enum class RoundDirection { Up, Down, Nearest }; + // This is based on the HPD data structure described as part of the Simple // Decimal Conversion algorithm by Nigel Tao, described at this link: // https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html @@ -111,12 +115,23 @@ uint8_t digits[MAX_NUM_DIGITS]; private: - bool should_round_up(int32_t roundToDigit) { + bool should_round_up(int32_t roundToDigit, RoundDirection round) { if (roundToDigit < 0 || static_cast(roundToDigit) >= this->num_digits) { return false; } + // The above condition handles all cases where all of the trailing digits + // are zero. In that case, if the rounding mode is up, then this number + // should be rounded up. Similarly, if the rounding mode is down, then it + // should always round down. + if (round == RoundDirection::Up) { + return true; + } else if (round == RoundDirection::Down) { + return false; + } + // Else round to nearest. + // If we're right in the middle and there are no extra digits if (this->digits[roundToDigit] == 5 && static_cast(roundToDigit + 1) == this->num_digits) { @@ -357,7 +372,8 @@ // Round the number represented to the closest value of unsigned int type T. // This is done ignoring overflow. - template T round_to_integer_type() { + template + T round_to_integer_type(RoundDirection round = RoundDirection::Nearest) { T result = 0; uint32_t cur_digit = 0; @@ -372,7 +388,7 @@ result *= 10; ++cur_digit; } - if (this->should_round_up(this->decimal_point)) { + if (this->should_round_up(this->decimal_point, round)) { ++result; } return result; diff --git a/libc/src/__support/str_to_float.h b/libc/src/__support/str_to_float.h --- a/libc/src/__support/str_to_float.h +++ b/libc/src/__support/str_to_float.h @@ -10,6 +10,7 @@ #define LIBC_SRC_SUPPORT_STR_TO_FLOAT_H #include "src/__support/CPP/limits.h" +#include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/UInt128.h" #include "src/__support/builtin_wrappers.h" @@ -18,11 +19,28 @@ #include "src/__support/detailed_powers_of_ten.h" #include "src/__support/high_precision_decimal.h" #include "src/__support/str_to_integer.h" +#include "src/__support/str_to_num_result.h" #include namespace __llvm_libc { namespace internal { +template struct FloatPair { + typename fputil::FPBits::UIntType mantissa; + int32_t exponent; +}; + +template struct FloatConvertReturn { + typename fputil::FPBits::UIntType mantissa = 0; + int32_t exponent = 0; + int error = 0; +}; + +template struct FloatParseReturn { + FloatConvertReturn convert_result; + size_t str_len = 0; +}; + template LIBC_INLINE uint32_t leading_zeroes(T inputNumber) { constexpr uint32_t BITS_IN_T = sizeof(T) * 8; if (inputNumber == 0) { @@ -91,23 +109,30 @@ // (https://github.com/golang/go/blob/release-branch.go1.16/src/strconv/eisel_lemire.go#L25) // for some optimizations as well as handling 32 bit floats. template -LIBC_INLINE bool -eisel_lemire(typename fputil::FPBits::UIntType mantissa, int32_t exp10, - typename fputil::FPBits::UIntType *outputMantissa, - uint32_t *outputExp2) { +LIBC_INLINE FloatConvertReturn +eisel_lemire(FloatPair init_num, + RoundDirection round = RoundDirection::Nearest) { using BitsType = typename fputil::FPBits::UIntType; + + BitsType mantissa = init_num.mantissa; + int32_t exp10 = init_num.exponent; + + const static FloatConvertReturn fail_output = {0, 0, -1}; + + FloatConvertReturn output; + constexpr uint32_t BITS_IN_MANTISSA = sizeof(mantissa) * 8; if (sizeof(T) > 8) { // This algorithm cannot handle anything longer than a // double, so we skip straight to the fallback. - return false; + return fail_output; } // Exp10 Range if (exp10 < DETAILED_POWERS_OF_TEN_MIN_EXP_10 || exp10 > DETAILED_POWERS_OF_TEN_MAX_EXP_10) { - return false; + return fail_output; } // Normalization @@ -146,7 +171,7 @@ if ((high64(second_approx) & halfway_constant) == halfway_constant && low64(second_approx) + 1 == 0 && low64(low_bits) + mantissa < mantissa) { - return false; + return fail_output; } final_approx = second_approx; } else { @@ -162,15 +187,28 @@ (fputil::FloatProperties::MANTISSA_WIDTH + 3))); exp2 -= static_cast(1 ^ msb); // same as !msb - // Half-way ambiguity - if (low64(final_approx) == 0 && - (high64(final_approx) & halfway_constant) == 0 && - (final_mantissa & 3) == 1) { - return false; + if (round == RoundDirection::Nearest) { + // Half-way ambiguity + if (low64(final_approx) == 0 && + (high64(final_approx) & halfway_constant) == 0 && + (final_mantissa & 3) == 1) { + return fail_output; + } + + // Round to even. + final_mantissa += final_mantissa & 1; + + } else if (round == RoundDirection::Up) { + // If any of the bits being rounded away are non-zero, then round up. + if (low64(final_approx) > 0 || + (high64(final_approx) & halfway_constant) > 0) { + // Add two since the last current lowest bit is about to be shifted away. + final_mantissa += 2; + } } + // else round down, which has no effect. // From 54 to 53 bits for doubles and 25 to 24 bits for floats - final_mantissa += final_mantissa & 1; final_mantissa >>= 1; if ((final_mantissa >> (fputil::FloatProperties::MANTISSA_WIDTH + 1)) > 0) { @@ -181,23 +219,30 @@ // The if block is equivalent to (but has fewer branches than): // if exp2 <= 0 || exp2 >= 0x7FF { etc } if (exp2 - 1 >= (1 << fputil::FloatProperties::EXPONENT_WIDTH) - 2) { - return false; + return fail_output; } - *outputMantissa = final_mantissa; - *outputExp2 = exp2; - return true; + output.mantissa = final_mantissa; + output.exponent = exp2; + return output; } #if !defined(LONG_DOUBLE_IS_DOUBLE) template <> -LIBC_INLINE bool eisel_lemire( - typename fputil::FPBits::UIntType mantissa, int32_t exp10, - typename fputil::FPBits::UIntType *outputMantissa, - uint32_t *outputExp2) { +LIBC_INLINE FloatConvertReturn +eisel_lemire(FloatPair init_num, + RoundDirection round) { using BitsType = typename fputil::FPBits::UIntType; + + BitsType mantissa = init_num.mantissa; + int32_t exp10 = init_num.exponent; + constexpr uint32_t BITS_IN_MANTISSA = sizeof(mantissa) * 8; + const static FloatConvertReturn fail_output = {0, 0, -1}; + + FloatConvertReturn output; + // Exp10 Range // This doesn't reach very far into the range for long doubles, since it's // sized for doubles and their 11 exponent bits, and not for long doubles and @@ -210,7 +255,7 @@ // out to the full long double range. if (exp10 < DETAILED_POWERS_OF_TEN_MIN_EXP_10 || exp10 > DETAILED_POWERS_OF_TEN_MAX_EXP_10) { - return false; + return fail_output; } // Normalization @@ -258,7 +303,7 @@ if ((final_approx_upper & HALFWAY_CONSTANT) == HALFWAY_CONSTANT && final_approx_lower + mantissa < mantissa) { - return false; + return fail_output; } // Shifting to 65 bits for 80 bit floats and 113 bits for 128 bit floats @@ -269,15 +314,27 @@ (fputil::FloatProperties::MANTISSA_WIDTH + 3)); exp2 -= static_cast(1 ^ msb); // same as !msb - // Half-way ambiguity - if (final_approx_lower == 0 && (final_approx_upper & HALFWAY_CONSTANT) == 0 && - (final_mantissa & 3) == 1) { - return false; + if (round == RoundDirection::Nearest) { + // Half-way ambiguity + if (final_approx_lower == 0 && + (final_approx_upper & HALFWAY_CONSTANT) == 0 && + (final_mantissa & 3) == 1) { + return fail_output; + } + // Round to even. + final_mantissa += final_mantissa & 1; + + } else if (round == RoundDirection::Up) { + // If any of the bits being rounded away are non-zero, then round up. + if (final_approx_lower > 0 || (final_approx_upper & HALFWAY_CONSTANT) > 0) { + // Add two since the last current lowest bit is about to be shifted away. + final_mantissa += 2; + } } + // else round down, which has no effect. // From 65 to 64 bits for 80 bit floats and 113 to 112 bits for 128 bit // floats - final_mantissa += final_mantissa & 1; final_mantissa >>= 1; if ((final_mantissa >> (fputil::FloatProperties::MANTISSA_WIDTH + 1)) > 0) { @@ -289,12 +346,12 @@ // if exp2 <= 0 || exp2 >= MANTISSA_MAX { etc } if (exp2 - 1 >= (1 << fputil::FloatProperties::EXPONENT_WIDTH) - 2) { - return false; + return fail_output; } - *outputMantissa = final_mantissa; - *outputExp2 = exp2; - return true; + output.mantissa = final_mantissa; + output.exponent = exp2; + return output; } #endif @@ -312,18 +369,19 @@ // on the Simple Decimal Conversion algorithm by Nigel Tao, described at this // link: https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html template -LIBC_INLINE void +LIBC_INLINE FloatConvertReturn simple_decimal_conversion(const char *__restrict numStart, - typename fputil::FPBits::UIntType *outputMantissa, - uint32_t *outputExp2) { + RoundDirection round = RoundDirection::Nearest) { int32_t exp2 = 0; HighPrecisionDecimal hpd = HighPrecisionDecimal(numStart); + FloatConvertReturn output; + if (hpd.get_num_digits() == 0) { - *outputMantissa = 0; - *outputExp2 = 0; - return; + output.mantissa = 0; + output.exponent = 0; + return output; } // If the exponent is too large and can't be represented in this size of @@ -331,20 +389,20 @@ if (hpd.get_decimal_point() > 0 && exp10_to_exp2(hpd.get_decimal_point() - 1) > static_cast(fputil::FloatProperties::EXPONENT_BIAS)) { - *outputMantissa = 0; - *outputExp2 = fputil::FPBits::MAX_EXPONENT; - errno = ERANGE; - return; + output.mantissa = 0; + output.exponent = fputil::FPBits::MAX_EXPONENT; + output.error = ERANGE; + return output; } // If the exponent is too small even for a subnormal, return 0. if (hpd.get_decimal_point() < 0 && exp10_to_exp2(-hpd.get_decimal_point()) > static_cast(fputil::FloatProperties::EXPONENT_BIAS + fputil::FloatProperties::MANTISSA_WIDTH)) { - *outputMantissa = 0; - *outputExp2 = 0; - errno = ERANGE; - return; + output.mantissa = 0; + output.exponent = 0; + output.error = ERANGE; + return output; } // Right shift until the number is smaller than 1. @@ -384,10 +442,10 @@ // Handle the exponent being too large (and return inf). if (exp2 >= fputil::FPBits::MAX_EXPONENT) { - *outputMantissa = 0; - *outputExp2 = fputil::FPBits::MAX_EXPONENT; - errno = ERANGE; - return; + output.mantissa = 0; + output.exponent = fputil::FPBits::MAX_EXPONENT; + output.error = ERANGE; + return output; } // Shift left to fill the mantissa @@ -406,7 +464,7 @@ // between 1 and 2. hpd.shift(-1); final_mantissa = - hpd.round_to_integer_type::UIntType>(); + hpd.round_to_integer_type::UIntType>(round); // Check if by shifting right we've caused this to round to a normal number. if ((final_mantissa >> fputil::FloatProperties::MANTISSA_WIDTH) != 0) { @@ -424,16 +482,17 @@ // INF. If this is the case, then finalMantissa and exp2 are already the // correct values for an INF result. if (exp2 >= fputil::FPBits::MAX_EXPONENT) { - errno = ERANGE; // NOLINT + output.error = ERANGE; } } if (exp2 == 0) { - errno = ERANGE; + output.error = ERANGE; } - *outputMantissa = final_mantissa; - *outputExp2 = exp2; + output.mantissa = final_mantissa; + output.exponent = exp2; + return output; } // This class is used for templating the constants for Clinger's Fast Path, @@ -510,12 +569,21 @@ // exponents, but handles them quickly. This is an implementation of Clinger's // Fast Path, as described above. template -LIBC_INLINE bool -clinger_fast_path(typename fputil::FPBits::UIntType mantissa, int32_t exp10, - typename fputil::FPBits::UIntType *outputMantissa, - uint32_t *outputExp2) { +LIBC_INLINE FloatConvertReturn +clinger_fast_path(FloatPair init_num, + RoundDirection round = RoundDirection::Nearest) { + + typename fputil::FPBits::UIntType mantissa = init_num.mantissa; + int32_t exp10 = init_num.exponent; + + // Since CFP can never set errno, the error field is used to represent if the + // algorithm succeeded. A non-zero value represents a failure state. + const static FloatConvertReturn fail_output = {0, 0, -1}; + + FloatConvertReturn output; + if (mantissa >> fputil::FloatProperties::MANTISSA_WIDTH > 0) { - return false; + return fail_output; } fputil::FPBits result; @@ -527,7 +595,7 @@ if (exp10 > 0) { if (exp10 > ClingerConsts::EXACT_POWERS_OF_TEN + ClingerConsts::DIGITS_IN_MANTISSA) { - return false; + return fail_output; } if (exp10 > ClingerConsts::EXACT_POWERS_OF_TEN) { float_mantissa = float_mantissa * @@ -536,20 +604,52 @@ exp10 = ClingerConsts::EXACT_POWERS_OF_TEN; } if (float_mantissa > ClingerConsts::MAX_EXACT_INT) { - return false; + return fail_output; } result = fputil::FPBits(float_mantissa * ClingerConsts::POWERS_OF_TEN_ARRAY[exp10]); } else if (exp10 < 0) { if (-exp10 > ClingerConsts::EXACT_POWERS_OF_TEN) { - return false; + return fail_output; } result = fputil::FPBits(float_mantissa / ClingerConsts::POWERS_OF_TEN_ARRAY[-exp10]); } - *outputMantissa = result.get_mantissa(); - *outputExp2 = result.get_unbiased_exponent(); - return true; + + // If the rounding mode is not nearest, then the sign of the number may affect + // the result. To make sure the rounding mode is respected properly, the + // calculation is redone with a negative result, and the rounding mode is used + // to select the correct result. + if (round != RoundDirection::Nearest) { + fputil::FPBits negative_result; + // I'm 99% sure this will break under fast math optimizations. + negative_result = fputil::FPBits( + (-float_mantissa) * ClingerConsts::POWERS_OF_TEN_ARRAY[exp10]); + + // If the results are equal, then we don't need to use the rounding mode. + if (T(result) != -T(negative_result)) { + fputil::FPBits lower_result; + fputil::FPBits higher_result; + + if (T(result) < -T(negative_result)) { + lower_result = result; + higher_result = negative_result; + } else { + lower_result = negative_result; + higher_result = result; + } + + if (round == RoundDirection::Up) { + result = higher_result; + } else { + result = lower_result; + } + } + } + output.mantissa = result.get_mantissa(); + output.exponent = result.get_unbiased_exponent(); + output.error = 0; + return output; } // The upper bound is the highest base-10 exponent that could possibly give a @@ -593,63 +693,75 @@ // accuracy. The resulting mantissa and exponent are placed in outputMantissa // and outputExp2. template -LIBC_INLINE void -decimal_exp_to_float(typename fputil::FPBits::UIntType mantissa, - int32_t exp10, const char *__restrict numStart, - bool truncated, - typename fputil::FPBits::UIntType *outputMantissa, - uint32_t *outputExp2) { +LIBC_INLINE FloatConvertReturn +decimal_exp_to_float(FloatPair init_num, const char *__restrict numStart, + bool truncated, RoundDirection round) { + + typename fputil::FPBits::UIntType mantissa = init_num.mantissa; + int32_t exp10 = init_num.exponent; + + FloatConvertReturn output; + // If the exponent is too large and can't be represented in this size of // float, return inf. These bounds are relatively loose, but are mostly // serving as a first pass. Some close numbers getting through is okay. if (exp10 > get_upper_bound()) { - *outputMantissa = 0; - *outputExp2 = fputil::FPBits::MAX_EXPONENT; - errno = ERANGE; - return; + output.mantissa = 0; + output.exponent = fputil::FPBits::MAX_EXPONENT; + output.error = ERANGE; + return output; } // If the exponent is too small even for a subnormal, return 0. if (exp10 < get_lower_bound()) { - *outputMantissa = 0; - *outputExp2 = 0; - errno = ERANGE; - return; + output.mantissa = 0; + output.exponent = 0; + output.error = ERANGE; + return output; } + // Clinger's Fast Path and Eisel-Lemire can't set errno, but they can fail. + // For this reason the "error" field in their return values is used to + // represent whether they've failed as opposed to the errno value. Any + // non-zero value represents a failure. + #ifndef LIBC_COPT_STRTOFLOAT_DISABLE_CLINGER_FAST_PATH if (!truncated) { - if (clinger_fast_path(mantissa, exp10, outputMantissa, outputExp2)) { - return; + output = clinger_fast_path(init_num, round); + // If the algorithm succeeded the error will be 0, else it will be a + // non-zero number. + if (output.error == 0) { + return output; } } #endif // LIBC_COPT_STRTOFLOAT_DISABLE_CLINGER_FAST_PATH #ifndef LIBC_COPT_STRTOFLOAT_DISABLE_EISEL_LEMIRE // Try Eisel-Lemire - if (eisel_lemire(mantissa, exp10, outputMantissa, outputExp2)) { + output = eisel_lemire(init_num, round); + if (output.error == 0) { if (!truncated) { - return; + return output; } // If the mantissa is truncated, then the result may be off by the LSB, so // check if rounding the mantissa up changes the result. If not, then it's // safe, else use the fallback. - typename fputil::FPBits::UIntType first_mantissa = *outputMantissa; - uint32_t first_exp2 = *outputExp2; - if (eisel_lemire(mantissa + 1, exp10, outputMantissa, outputExp2)) { - if (*outputMantissa == first_mantissa && *outputExp2 == first_exp2) { - return; + auto secound_output = eisel_lemire({mantissa + 1, exp10}, round); + if (secound_output.error == 0) { + if (output.mantissa == secound_output.mantissa && + output.exponent == secound_output.exponent) { + return output; } } } #endif // LIBC_COPT_STRTOFLOAT_DISABLE_EISEL_LEMIRE #ifndef LIBC_COPT_STRTOFLOAT_DISABLE_SIMPLE_DECIMAL_CONVERSION - simple_decimal_conversion(numStart, outputMantissa, outputExp2); + output = simple_decimal_conversion(numStart, round); #else #warning "Simple decimal conversion is disabled, result may not be correct." #endif // LIBC_COPT_STRTOFLOAT_DISABLE_SIMPLE_DECIMAL_CONVERSION - return; + return output; } // Takes a mantissa and base 2 exponent and converts it into its closest @@ -657,13 +769,16 @@ // form, this is mostly just shifting and rounding. This is used for hexadecimal // numbers since a base 16 exponent multiplied by 4 is the base 2 exponent. template -LIBC_INLINE void -binary_exp_to_float(typename fputil::FPBits::UIntType mantissa, int32_t exp2, - bool truncated, - typename fputil::FPBits::UIntType *outputMantissa, - uint32_t *outputExp2) { +LIBC_INLINE FloatConvertReturn binary_exp_to_float(FloatPair init_num, + bool truncated, + RoundDirection round) { using BitsType = typename fputil::FPBits::UIntType; + BitsType mantissa = init_num.mantissa; + int32_t exp2 = init_num.exponent; + + FloatConvertReturn output; + // This is the number of leading zeroes a properly normalized float of type T // should have. constexpr int32_t NUMBITS = sizeof(BitsType) * 8; @@ -684,10 +799,10 @@ // Handle numbers that're too large and get squashed to inf if (biased_exponent >= INF_EXP) { // This indicates an overflow, so we make the result INF and set errno. - *outputExp2 = (1 << fputil::FloatProperties::EXPONENT_WIDTH) - 1; - *outputMantissa = 0; - errno = ERANGE; - return; + output.exponent = (1 << fputil::FloatProperties::EXPONENT_WIDTH) - 1; + output.mantissa = 0; + output.error = ERANGE; + return output; } uint32_t amount_to_shift_right = @@ -700,10 +815,10 @@ if (amount_to_shift_right > NUMBITS) { // Return 0 if the exponent is too small. - *outputMantissa = 0; - *outputExp2 = 0; - errno = ERANGE; - return; + output.exponent = 0; + output.mantissa = 0; + output.error = ERANGE; + return output; } } @@ -720,9 +835,22 @@ mantissa = 0; } bool least_significant_bit = mantissa & BitsType(1); - // Perform rounding-to-nearest, tie-to-even. - if (round_bit && (least_significant_bit || sticky_bit)) { - ++mantissa; + + // TODO: check that this rounding behavior is correct. + + if (round == RoundDirection::Nearest) { + // Perform rounding-to-nearest, tie-to-even. + if (round_bit && (least_significant_bit || sticky_bit)) { + ++mantissa; + } + } else if (round == RoundDirection::Up) { + if (round_bit || sticky_bit) { + ++mantissa; + } + } else /* (round == RoundDirection::Down)*/ { + if (round_bit && sticky_bit) { + ++mantissa; + } } if (mantissa > fputil::FloatProperties::MANTISSA_MASK) { @@ -730,30 +858,31 @@ ++biased_exponent; if (biased_exponent == INF_EXP) { - errno = ERANGE; + output.error = ERANGE; } } if (biased_exponent == 0) { - errno = ERANGE; + output.error = ERANGE; } - *outputMantissa = mantissa & fputil::FloatProperties::MANTISSA_MASK; - *outputExp2 = biased_exponent; + output.mantissa = mantissa & fputil::FloatProperties::MANTISSA_MASK; + output.exponent = biased_exponent; + return output; } // checks if the next 4 characters of the string pointer are the start of a // hexadecimal floating point number. Does not advance the string pointer. LIBC_INLINE bool is_float_hex_start(const char *__restrict src, const char decimalPoint) { - if (!(*src == '0' && (*(src + 1) | 32) == 'x')) { + if (!(src[0] == '0' && tolower(src[1]) == 'x')) { return false; } - if (*(src + 2) == decimalPoint) { - return isalnum(*(src + 3)) && b36_char_to_int(*(src + 3)) < 16; - } else { - return isalnum(*(src + 2)) && b36_char_to_int(*(src + 2)) < 16; + size_t first_digit = 2; + if (src[2] == decimalPoint) { + ++first_digit; } + return isalnum(src[first_digit]) && b36_char_to_int(src[first_digit]) < 16; } // Takes the start of a string representing a decimal float, as well as the @@ -763,22 +892,23 @@ // If the return value is false, then it is assumed that there is no number // here. template -LIBC_INLINE bool +LIBC_INLINE FloatParseReturn decimal_string_to_float(const char *__restrict src, const char DECIMAL_POINT, - char **__restrict strEnd, - typename fputil::FPBits::UIntType *outputMantissa, - uint32_t *outputExponent) { + RoundDirection round) { using BitsType = typename fputil::FPBits::UIntType; constexpr uint32_t BASE = 10; constexpr char EXPONENT_MARKER = 'e'; - const char *__restrict num_start = src; bool truncated = false; bool seen_digit = false; bool after_decimal = false; BitsType mantissa = 0; int32_t exponent = 0; + size_t index = 0; + + FloatParseReturn output; + // The goal for the first step of parsing is to convert the number in src to // the format mantissa * (base ^ exponent) @@ -786,8 +916,8 @@ const BitsType bitstype_max_div_by_base = cpp::numeric_limits::max() / BASE; while (true) { - if (isdigit(*src)) { - uint32_t digit = *src - '0'; + if (isdigit(src[index])) { + uint32_t digit = src[index] - '0'; seen_digit = true; if (mantissa < bitstype_max_div_by_base) { @@ -802,16 +932,16 @@ ++exponent; } - ++src; + ++index; continue; } - if (*src == DECIMAL_POINT) { + if (src[index] == DECIMAL_POINT) { if (after_decimal) { - break; // this means that *src points to a second decimal point, ending - // the number. + break; // this means that src[index] points to a second decimal point, + // ending the number. } after_decimal = true; - ++src; + ++index; continue; } // The character is neither a digit nor a decimal point. @@ -819,35 +949,35 @@ } if (!seen_digit) - return false; - - if ((*src | 32) == EXPONENT_MARKER) { - if (*(src + 1) == '+' || *(src + 1) == '-' || isdigit(*(src + 1))) { - ++src; - char *temp_str_end; - auto result = strtointeger(src, 10); - // TODO: If error, return with error. - temp_str_end = const_cast(src + result.parsed_len); + return output; + + if (tolower(src[index]) == EXPONENT_MARKER) { + if (src[index + 1] == '+' || src[index + 1] == '-' || + isdigit(src[index + 1])) { + ++index; + auto result = strtointeger(src + index, 10); + if (result.has_error()) + output.convert_result.error = result.error; int32_t add_to_exponent = result.value; if (add_to_exponent > 100000) add_to_exponent = 100000; else if (add_to_exponent < -100000) add_to_exponent = -100000; - src = temp_str_end; + index += result.parsed_len; exponent += add_to_exponent; } } - *strEnd = const_cast(src); + output.str_len = index; if (mantissa == 0) { // if we have a 0, then also 0 the exponent. - *outputMantissa = 0; - *outputExponent = 0; + output.convert_result.exponent = 0; + output.convert_result.mantissa = 0; } else { - decimal_exp_to_float(mantissa, exponent, num_start, truncated, - outputMantissa, outputExponent); + output.convert_result = + decimal_exp_to_float({mantissa, exponent}, src, truncated, round); } - return true; + return output; } // Takes the start of a string representing a hexadecimal float, as well as the @@ -857,11 +987,9 @@ // If the return value is false, then it is assumed that there is no number // here. template -LIBC_INLINE bool hexadecimal_string_to_float( - const char *__restrict src, const char DECIMAL_POINT, - char **__restrict strEnd, - typename fputil::FPBits::UIntType *outputMantissa, - uint32_t *outputExponent) { +LIBC_INLINE FloatParseReturn +hexadecimal_string_to_float(const char *__restrict src, + const char DECIMAL_POINT, RoundDirection round) { using BitsType = typename fputil::FPBits::UIntType; constexpr uint32_t BASE = 16; constexpr char EXPONENT_MARKER = 'p'; @@ -872,6 +1000,10 @@ BitsType mantissa = 0; int32_t exponent = 0; + size_t index = 0; + + FloatParseReturn output; + // The goal for the first step of parsing is to convert the number in src to // the format mantissa * (base ^ exponent) @@ -879,8 +1011,8 @@ const BitsType bitstype_max_div_by_base = cpp::numeric_limits::max() / BASE; while (true) { - if (isalnum(*src)) { - uint32_t digit = b36_char_to_int(*src); + if (isalnum(src[index])) { + uint32_t digit = b36_char_to_int(src[index]); if (digit < BASE) seen_digit = true; else @@ -896,16 +1028,16 @@ if (!after_decimal) ++exponent; } - ++src; + ++index; continue; } - if (*src == DECIMAL_POINT) { + if (src[index] == DECIMAL_POINT) { if (after_decimal) { - break; // this means that *src points to a second decimal point, ending - // the number. + break; // this means that src[index] points to a second decimal point, + // ending the number. } after_decimal = true; - ++src; + ++index; continue; } // The character is neither a hexadecimal digit nor a decimal point. @@ -913,118 +1045,146 @@ } if (!seen_digit) - return false; + return output; // Convert the exponent from having a base of 16 to having a base of 2. exponent *= 4; - if ((*src | 32) == EXPONENT_MARKER) { - if (*(src + 1) == '+' || *(src + 1) == '-' || isdigit(*(src + 1))) { - ++src; - char *temp_str_end; - auto result = strtointeger(src, 10); - // TODO: If error, return error. - temp_str_end = const_cast(src + result.parsed_len); + if (tolower(src[index]) == EXPONENT_MARKER) { + if (src[index + 1] == '+' || src[index + 1] == '-' || + isdigit(src[index + 1])) { + ++index; + auto result = strtointeger(src + index, 10); + if (result.has_error()) + output.convert_result.error = result.error; + int32_t add_to_exponent = result.value; if (add_to_exponent > 100000) add_to_exponent = 100000; else if (add_to_exponent < -100000) add_to_exponent = -100000; - src = temp_str_end; + index += result.parsed_len; exponent += add_to_exponent; } } - *strEnd = const_cast(src); + output.str_len = index; if (mantissa == 0) { // if we have a 0, then also 0 the exponent. - *outputMantissa = 0; - *outputExponent = 0; + output.convert_result.exponent = 0; + output.convert_result.mantissa = 0; } else { - binary_exp_to_float(mantissa, exponent, truncated, outputMantissa, - outputExponent); + output.convert_result = + binary_exp_to_float({mantissa, exponent}, truncated, round); } - return true; + return output; } // Takes a pointer to a string and a pointer to a string pointer. This function // is used as the backend for all of the string to float functions. template -LIBC_INLINE T strtofloatingpoint(const char *__restrict src, - char **__restrict strEnd) { +LIBC_INLINE StrToNumResult strtofloatingpoint(const char *__restrict src) { using BitsType = typename fputil::FPBits::UIntType; fputil::FPBits result = fputil::FPBits(); - const char *original_src = src; bool seen_digit = false; - src = first_non_whitespace(src); + char sign = '+'; - if (*src == '+' || *src == '-') { - if (*src == '-') { - result.set_sign(true); - } - ++src; + int error = 0; + + ptrdiff_t index = first_non_whitespace(src) - src; + + if (src[index] == '+' || src[index] == '-') { + sign = src[index]; + ++index; + } + + if (sign == '-') { + result.set_sign(true); } static constexpr char DECIMAL_POINT = '.'; static const char *inf_string = "infinity"; static const char *nan_string = "nan"; - // bool truncated = false; - - if (isdigit(*src) || *src == DECIMAL_POINT) { // regular number + if (isdigit(src[index]) || src[index] == DECIMAL_POINT) { // regular number int base = 10; - if (is_float_hex_start(src, DECIMAL_POINT)) { + if (is_float_hex_start(src + index, DECIMAL_POINT)) { base = 16; - src += 2; + index += 2; seen_digit = true; } - char *new_str_end = nullptr; - BitsType output_mantissa = ~0; - uint32_t output_exponent = ~0; - if (base == 16) { - seen_digit = hexadecimal_string_to_float( - src, DECIMAL_POINT, &new_str_end, &output_mantissa, &output_exponent); - } else { // base is 10 - seen_digit = decimal_string_to_float( - src, DECIMAL_POINT, &new_str_end, &output_mantissa, &output_exponent); + RoundDirection round_direction = RoundDirection::Nearest; + + switch (fputil::get_round()) { + case FE_TONEAREST: + round_direction = RoundDirection::Nearest; + break; + case FE_UPWARD: + if (sign == '+') { + round_direction = RoundDirection::Up; + } else { + round_direction = RoundDirection::Down; + } + break; + case FE_DOWNWARD: + if (sign == '+') { + round_direction = RoundDirection::Down; + } else { + round_direction = RoundDirection::Up; + } + break; + case FE_TOWARDZERO: + round_direction = RoundDirection::Down; + break; } - if (seen_digit) { - src += new_str_end - src; - result.set_mantissa(output_mantissa); - result.set_unbiased_exponent(output_exponent); + FloatParseReturn parse_result; + if (base == 16) { + parse_result = hexadecimal_string_to_float(src + index, DECIMAL_POINT, + round_direction); + } else { // base is 10 + parse_result = decimal_string_to_float(src + index, DECIMAL_POINT, + round_direction); } - } else if ((*src | 32) == 'n') { // NaN - if ((src[1] | 32) == nan_string[1] && (src[2] | 32) == nan_string[2]) { + seen_digit = parse_result.str_len != 0; + result.set_mantissa(parse_result.convert_result.mantissa); + result.set_unbiased_exponent(parse_result.convert_result.exponent); + index += parse_result.str_len; + error = parse_result.convert_result.error; + } else if (tolower(src[index]) == 'n') { // NaN + if (tolower(src[index + 1]) == nan_string[1] && + tolower(src[index + 2]) == nan_string[2]) { seen_digit = true; - src += 3; + index += 3; BitsType nan_mantissa = 0; // this handles the case of `NaN(n-character-sequence)`, where the // n-character-sequence is made of 0 or more letters and numbers in any // order. - if (*src == '(') { - const char *left_paren = src; - ++src; - while (isalnum(*src)) - ++src; - if (*src == ')') { - ++src; - char *temp_src = 0; - if (isdigit(*(left_paren + 1))) { + if (src[index] == '(') { + size_t left_paren = index; + ++index; + while (isalnum(src[index])) + ++index; + if (src[index] == ')') { + ++index; + if (isdigit(src[left_paren + 1])) { // This is to prevent errors when BitsType is larger than 64 bits, // since strtointeger only supports up to 64 bits. This is actually // more than is required by the specification, which says for the // input type "NAN(n-char-sequence)" that "the meaning of // the n-char sequence is implementation-defined." - auto result = strtointeger(left_paren + 1, 0); - // TODO: If error, return error - temp_src = const_cast(left_paren + 1 + result.parsed_len); - nan_mantissa = result.value; - if (*temp_src != ')') + auto strtoint_result = + strtointeger(src + (left_paren + 1), 0); + if (strtoint_result.has_error()) { + error = strtoint_result.error; + } + nan_mantissa = strtoint_result.value; + if (src[left_paren + 1 + strtoint_result.parsed_len] != ')') nan_mantissa = 0; } - } else - src = left_paren; + } else { + index = left_paren; + } } nan_mantissa |= fputil::FloatProperties::QUIET_NAN_MASK; if (result.get_sign()) { @@ -1035,37 +1195,35 @@ result = fputil::FPBits(result.build_quiet_nan(nan_mantissa)); } } - } else if ((*src | 32) == 'i') { // INF - if ((src[1] | 32) == inf_string[1] && (src[2] | 32) == inf_string[2]) { + } else if (tolower(src[index]) == 'i') { // INF + if (tolower(src[index + 1]) == inf_string[1] && + tolower(src[index + 2]) == inf_string[2]) { seen_digit = true; if (result.get_sign()) result = result.neg_inf(); else result = result.inf(); - if ((src[3] | 32) == inf_string[3] && (src[4] | 32) == inf_string[4] && - (src[5] | 32) == inf_string[5] && (src[6] | 32) == inf_string[6] && - (src[7] | 32) == inf_string[7]) { - // if the string is "INFINITY" then strEnd needs to be set to src + 8. - src += 8; + if (tolower(src[index + 3]) == inf_string[3] && + tolower(src[index + 4]) == inf_string[4] && + tolower(src[index + 5]) == inf_string[5] && + tolower(src[index + 6]) == inf_string[6] && + tolower(src[index + 7]) == inf_string[7]) { + // if the string is "INFINITY" then consume 8 characters. + index += 8; } else { - src += 3; + index += 3; } } } if (!seen_digit) { // If there is nothing to actually parse, then return 0. - if (strEnd != nullptr) - *strEnd = const_cast(original_src); - return T(0); + return {T(0), 0, error}; } - if (strEnd != nullptr) - *strEnd = const_cast(src); - // This function only does something if T is long double and the platform uses // special 80 bit long doubles. Otherwise it should be inlined out. set_implicit_bit(result); - return T(result); + return {T(result), index, error}; } } // namespace internal diff --git a/libc/src/stdio/scanf_core/converter_utils.h b/libc/src/stdio/scanf_core/converter_utils.h --- a/libc/src/stdio/scanf_core/converter_utils.h +++ b/libc/src/stdio/scanf_core/converter_utils.h @@ -90,17 +90,17 @@ LengthModifier lm = to_conv.length_modifier; switch (lm) { case (LengthModifier::l): { - auto value = internal::strtofloatingpoint(str, nullptr); + auto value = internal::strtofloatingpoint(str); *reinterpret_cast(output_ptr) = value; break; } case (LengthModifier::L): { - auto value = internal::strtofloatingpoint(str, nullptr); + auto value = internal::strtofloatingpoint(str); *reinterpret_cast(output_ptr) = value; break; } default: { - auto value = internal::strtofloatingpoint(str, nullptr); + auto value = internal::strtofloatingpoint(str); *reinterpret_cast(output_ptr) = value; break; } diff --git a/libc/src/stdlib/atof.cpp b/libc/src/stdlib/atof.cpp --- a/libc/src/stdlib/atof.cpp +++ b/libc/src/stdlib/atof.cpp @@ -9,11 +9,16 @@ #include "src/stdlib/atof.h" #include "src/__support/common.h" #include "src/__support/str_to_float.h" +#include namespace __llvm_libc { LLVM_LIBC_FUNCTION(double, atof, (const char *str)) { - return internal::strtofloatingpoint(str, nullptr); + auto result = internal::strtofloatingpoint(str); + if (result.has_error()) + errno = result.error; + + return result.value; } } // namespace __llvm_libc diff --git a/libc/src/stdlib/strtod.cpp b/libc/src/stdlib/strtod.cpp --- a/libc/src/stdlib/strtod.cpp +++ b/libc/src/stdlib/strtod.cpp @@ -9,12 +9,20 @@ #include "src/stdlib/strtod.h" #include "src/__support/common.h" #include "src/__support/str_to_float.h" +#include namespace __llvm_libc { LLVM_LIBC_FUNCTION(double, strtod, (const char *__restrict str, char **__restrict str_end)) { - return internal::strtofloatingpoint(str, str_end); + auto result = internal::strtofloatingpoint(str); + if (result.has_error()) + errno = result.error; + + if (str_end != NULL) + *str_end = const_cast(str + result.parsed_len); + + return result.value; } } // namespace __llvm_libc diff --git a/libc/src/stdlib/strtof.cpp b/libc/src/stdlib/strtof.cpp --- a/libc/src/stdlib/strtof.cpp +++ b/libc/src/stdlib/strtof.cpp @@ -9,12 +9,20 @@ #include "src/stdlib/strtof.h" #include "src/__support/common.h" #include "src/__support/str_to_float.h" +#include namespace __llvm_libc { LLVM_LIBC_FUNCTION(float, strtof, (const char *__restrict str, char **__restrict str_end)) { - return internal::strtofloatingpoint(str, str_end); + auto result = internal::strtofloatingpoint(str); + if (result.has_error()) + errno = result.error; + + if (str_end != NULL) + *str_end = const_cast(str + result.parsed_len); + + return result.value; } } // namespace __llvm_libc diff --git a/libc/src/stdlib/strtold.cpp b/libc/src/stdlib/strtold.cpp --- a/libc/src/stdlib/strtold.cpp +++ b/libc/src/stdlib/strtold.cpp @@ -9,12 +9,20 @@ #include "src/stdlib/strtold.h" #include "src/__support/common.h" #include "src/__support/str_to_float.h" +#include namespace __llvm_libc { LLVM_LIBC_FUNCTION(long double, strtold, (const char *__restrict str, char **__restrict str_end)) { - return internal::strtofloatingpoint(str, str_end); + auto result = internal::strtofloatingpoint(str); + if (result.has_error()) + errno = result.error; + + if (str_end != NULL) + *str_end = const_cast(str + result.parsed_len); + + return result.value; } } // namespace __llvm_libc diff --git a/libc/test/src/__support/str_to_float_test.cpp b/libc/test/src/__support/str_to_float_test.cpp --- a/libc/test/src/__support/str_to_float_test.cpp +++ b/libc/test/src/__support/str_to_float_test.cpp @@ -25,9 +25,13 @@ 0; uint32_t actual_output_exp2 = 0; - ASSERT_TRUE(__llvm_libc::internal::clinger_fast_path( - inputMantissa, inputExp10, &actual_output_mantissa, - &actual_output_exp2)); + auto result = __llvm_libc::internal::clinger_fast_path( + {inputMantissa, inputExp10}); + + actual_output_mantissa = result.mantissa; + actual_output_exp2 = result.exponent; + + ASSERT_TRUE(result.error == 0); EXPECT_EQ(actual_output_mantissa, expectedOutputMantissa); EXPECT_EQ(actual_output_exp2, expectedOutputExp2); } @@ -36,13 +40,9 @@ void clinger_fast_path_fails_test( const typename __llvm_libc::fputil::FPBits::UIntType inputMantissa, const int32_t inputExp10) { - typename __llvm_libc::fputil::FPBits::UIntType actual_output_mantissa = - 0; - uint32_t actual_output_exp2 = 0; - - ASSERT_FALSE(__llvm_libc::internal::clinger_fast_path( - inputMantissa, inputExp10, &actual_output_mantissa, - &actual_output_exp2)); + ASSERT_FALSE( + __llvm_libc::internal::clinger_fast_path({inputMantissa, inputExp10}) + .error == 0); } template @@ -56,9 +56,13 @@ 0; uint32_t actual_output_exp2 = 0; - ASSERT_TRUE(__llvm_libc::internal::eisel_lemire( - inputMantissa, inputExp10, &actual_output_mantissa, - &actual_output_exp2)); + auto result = + __llvm_libc::internal::eisel_lemire({inputMantissa, inputExp10}); + + actual_output_mantissa = result.mantissa; + actual_output_exp2 = result.exponent; + + ASSERT_TRUE(result.error == 0); EXPECT_EQ(actual_output_mantissa, expectedOutputMantissa); EXPECT_EQ(actual_output_exp2, expectedOutputExp2); } @@ -74,11 +78,14 @@ uint32_t actual_output_exp2 = 0; errno = 0; - __llvm_libc::internal::simple_decimal_conversion( - numStart, &actual_output_mantissa, &actual_output_exp2); + auto result = __llvm_libc::internal::simple_decimal_conversion(numStart); + + actual_output_mantissa = result.mantissa; + actual_output_exp2 = result.exponent; + EXPECT_EQ(actual_output_mantissa, expectedOutputMantissa); EXPECT_EQ(actual_output_exp2, expectedOutputExp2); - EXPECT_EQ(errno, expectedErrno); + EXPECT_EQ(result.error, expectedErrno); } }; @@ -172,20 +179,17 @@ eisel_lemire_test(2794967654709307188u, 1, 0x183e132bc608c9, 1087); } +// Check the fallback states for the algorithm: TEST_F(LlvmLibcStrToFloatTest, EiselLemireFallbackStates) { - // Check the fallback states for the algorithm: - uint32_t float_output_mantissa = 0; - uint64_t double_output_mantissa = 0; - uint32_t output_exp2 = 0; - // This number can't be evaluated by Eisel-Lemire since it's exactly 1024 away // from both of its closest floating point approximations // (12345678901234548736 and 12345678901234550784) - ASSERT_FALSE(__llvm_libc::internal::eisel_lemire( - 12345678901234549760u, 0, &double_output_mantissa, &output_exp2)); + ASSERT_FALSE( + __llvm_libc::internal::eisel_lemire({12345678901234549760u, 0}) + .error == 0); - ASSERT_FALSE(__llvm_libc::internal::eisel_lemire( - 20040229, 0, &float_output_mantissa, &output_exp2)); + ASSERT_FALSE( + __llvm_libc::internal::eisel_lemire({20040229, 0}).error == 0); } TEST_F(LlvmLibcStrToFloatTest, SimpleDecimalConversion64BasicWholeNumbers) { @@ -245,21 +249,27 @@ uint32_t output_exp2 = 0; errno = 0; - __llvm_libc::internal::simple_decimal_conversion( - "123456789012345678900", &float_output_mantissa, &output_exp2); + auto float_result = __llvm_libc::internal::simple_decimal_conversion( + "123456789012345678900"); + float_output_mantissa = float_result.mantissa; + output_exp2 = float_result.exponent; EXPECT_EQ(float_output_mantissa, uint32_t(0xd629d4)); EXPECT_EQ(output_exp2, uint32_t(193)); - EXPECT_EQ(errno, 0); + EXPECT_EQ(float_result.error, 0); uint64_t double_output_mantissa = 0; output_exp2 = 0; errno = 0; - __llvm_libc::internal::simple_decimal_conversion( - "123456789012345678900", &double_output_mantissa, &output_exp2); + auto double_result = __llvm_libc::internal::simple_decimal_conversion( + "123456789012345678900"); + + double_output_mantissa = double_result.mantissa; + output_exp2 = double_result.exponent; + EXPECT_EQ(double_output_mantissa, uint64_t(0x1AC53A7E04BCDA)); EXPECT_EQ(output_exp2, uint32_t(1089)); - EXPECT_EQ(errno, 0); + EXPECT_EQ(double_result.error, 0); } #if defined(LONG_DOUBLE_IS_DOUBLE) @@ -299,20 +309,18 @@ } TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat80Fallback) { - uint32_t outputExp2 = 0; - UInt128 quadOutputMantissa = 0; - // This number is halfway between two possible results, and the algorithm // can't determine which is correct. ASSERT_FALSE(__llvm_libc::internal::eisel_lemire( - 12345678901234567890u, 1, &quadOutputMantissa, &outputExp2)); + {12345678901234567890u, 1}) + .error == 0); // These numbers' exponents are out of range for the current powers of ten // table. - ASSERT_FALSE(__llvm_libc::internal::eisel_lemire( - 1, 1000, &quadOutputMantissa, &outputExp2)); - ASSERT_FALSE(__llvm_libc::internal::eisel_lemire( - 1, -1000, &quadOutputMantissa, &outputExp2)); + ASSERT_FALSE( + __llvm_libc::internal::eisel_lemire({1, 1000}).error == 0); + ASSERT_FALSE( + __llvm_libc::internal::eisel_lemire({1, -1000}).error == 0); } #else // Quad precision long double TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat128Simple) { @@ -336,11 +344,10 @@ } TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat128Fallback) { - uint32_t outputExp2 = 0; - UInt128 quadOutputMantissa = 0; - - ASSERT_FALSE(__llvm_libc::internal::eisel_lemire( - (UInt128(0x5ce0e9a56015fec5) << 64) + UInt128(0xaadfa328ae39b333), 1, - &quadOutputMantissa, &outputExp2)); + ASSERT_FALSE( + __llvm_libc::internal::eisel_lemire( + {(UInt128(0x5ce0e9a56015fec5) << 64) + UInt128(0xaadfa328ae39b333), + 1}, ) + .error == 0); } #endif