diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt --- a/libc/src/__support/CMakeLists.txt +++ b/libc/src/__support/CMakeLists.txt @@ -111,8 +111,9 @@ .ctype_utils .high_precision_decimal .str_to_integer + .str_to_num_result .uint128 - libc.include.errno + libc.src.__support.CPP.optional libc.src.__support.CPP.limits libc.src.__support.FPUtil.fp_bits libc.src.__support.builtin_wrappers diff --git a/libc/src/__support/high_precision_decimal.h b/libc/src/__support/high_precision_decimal.h --- a/libc/src/__support/high_precision_decimal.h +++ b/libc/src/__support/high_precision_decimal.h @@ -21,6 +21,10 @@ char const *power_of_five; }; +// This is used in both this file and in the main str_to_float.h. +// TODO: Figure out where to put this. +enum class RoundDirection { Up, Down, Nearest }; + // This is based on the HPD data structure described as part of the Simple // Decimal Conversion algorithm by Nigel Tao, described at this link: // https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html @@ -111,12 +115,23 @@ uint8_t digits[MAX_NUM_DIGITS]; private: - bool should_round_up(int32_t roundToDigit) { + bool should_round_up(int32_t roundToDigit, RoundDirection round) { if (roundToDigit < 0 || static_cast(roundToDigit) >= this->num_digits) { return false; } + // The above condition handles all cases where all of the trailing digits + // are zero. In that case, if the rounding mode is up, then this number + // should be rounded up. Similarly, if the rounding mode is down, then it + // should always round down. + if (round == RoundDirection::Up) { + return true; + } else if (round == RoundDirection::Down) { + return false; + } + // Else round to nearest. + // If we're right in the middle and there are no extra digits if (this->digits[roundToDigit] == 5 && static_cast(roundToDigit + 1) == this->num_digits) { @@ -357,7 +372,8 @@ // Round the number represented to the closest value of unsigned int type T. // This is done ignoring overflow. - template T round_to_integer_type() { + template + T round_to_integer_type(RoundDirection round = RoundDirection::Nearest) { T result = 0; uint32_t cur_digit = 0; @@ -372,10 +388,7 @@ result *= 10; ++cur_digit; } - if (this->should_round_up(this->decimal_point)) { - ++result; - } - return result; + return result + this->should_round_up(this->decimal_point, round); } // Extra functions for testing. diff --git a/libc/src/__support/str_to_float.h b/libc/src/__support/str_to_float.h --- a/libc/src/__support/str_to_float.h +++ b/libc/src/__support/str_to_float.h @@ -10,6 +10,8 @@ #define LIBC_SRC_SUPPORT_STR_TO_FLOAT_H #include "src/__support/CPP/limits.h" +#include "src/__support/CPP/optional.h" +#include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/UInt128.h" #include "src/__support/builtin_wrappers.h" @@ -18,11 +20,22 @@ #include "src/__support/detailed_powers_of_ten.h" #include "src/__support/high_precision_decimal.h" #include "src/__support/str_to_integer.h" +#include "src/__support/str_to_num_result.h" #include namespace __llvm_libc { namespace internal { +template struct ExpandedFloat { + typename fputil::FPBits::UIntType mantissa; + int32_t exponent; +}; + +template struct FloatConvertReturn { + ExpandedFloat num = {0, 0}; + int error = 0; +}; + template LIBC_INLINE uint32_t leading_zeroes(T inputNumber) { constexpr uint32_t BITS_IN_T = sizeof(T) * 8; if (inputNumber == 0) { @@ -91,23 +104,26 @@ // (https://github.com/golang/go/blob/release-branch.go1.16/src/strconv/eisel_lemire.go#L25) // for some optimizations as well as handling 32 bit floats. template -LIBC_INLINE bool -eisel_lemire(typename fputil::FPBits::UIntType mantissa, int32_t exp10, - typename fputil::FPBits::UIntType *outputMantissa, - uint32_t *outputExp2) { +LIBC_INLINE cpp::optional> +eisel_lemire(ExpandedFloat init_num, + RoundDirection round = RoundDirection::Nearest) { using BitsType = typename fputil::FPBits::UIntType; + + BitsType mantissa = init_num.mantissa; + int32_t exp10 = init_num.exponent; + constexpr uint32_t BITS_IN_MANTISSA = sizeof(mantissa) * 8; if (sizeof(T) > 8) { // This algorithm cannot handle anything longer than a // double, so we skip straight to the fallback. - return false; + return cpp::nullopt; } // Exp10 Range if (exp10 < DETAILED_POWERS_OF_TEN_MIN_EXP_10 || exp10 > DETAILED_POWERS_OF_TEN_MAX_EXP_10) { - return false; + return cpp::nullopt; } // Normalization @@ -146,7 +162,7 @@ if ((high64(second_approx) & halfway_constant) == halfway_constant && low64(second_approx) + 1 == 0 && low64(low_bits) + mantissa < mantissa) { - return false; + return cpp::nullopt; } final_approx = second_approx; } else { @@ -162,15 +178,28 @@ (fputil::FloatProperties::MANTISSA_WIDTH + 3))); exp2 -= static_cast(1 ^ msb); // same as !msb - // Half-way ambiguity - if (low64(final_approx) == 0 && - (high64(final_approx) & halfway_constant) == 0 && - (final_mantissa & 3) == 1) { - return false; + if (round == RoundDirection::Nearest) { + // Half-way ambiguity + if (low64(final_approx) == 0 && + (high64(final_approx) & halfway_constant) == 0 && + (final_mantissa & 3) == 1) { + return cpp::nullopt; + } + + // Round to even. + final_mantissa += final_mantissa & 1; + + } else if (round == RoundDirection::Up) { + // If any of the bits being rounded away are non-zero, then round up. + if (low64(final_approx) > 0 || + (high64(final_approx) & halfway_constant) > 0) { + // Add two since the last current lowest bit is about to be shifted away. + final_mantissa += 2; + } } + // else round down, which has no effect. // From 54 to 53 bits for doubles and 25 to 24 bits for floats - final_mantissa += final_mantissa & 1; final_mantissa >>= 1; if ((final_mantissa >> (fputil::FloatProperties::MANTISSA_WIDTH + 1)) > 0) { @@ -181,21 +210,25 @@ // The if block is equivalent to (but has fewer branches than): // if exp2 <= 0 || exp2 >= 0x7FF { etc } if (exp2 - 1 >= (1 << fputil::FloatProperties::EXPONENT_WIDTH) - 2) { - return false; + return cpp::nullopt; } - *outputMantissa = final_mantissa; - *outputExp2 = exp2; - return true; + ExpandedFloat output; + output.mantissa = final_mantissa; + output.exponent = exp2; + return output; } #if !defined(LONG_DOUBLE_IS_DOUBLE) template <> -LIBC_INLINE bool eisel_lemire( - typename fputil::FPBits::UIntType mantissa, int32_t exp10, - typename fputil::FPBits::UIntType *outputMantissa, - uint32_t *outputExp2) { +LIBC_INLINE cpp::optional> +eisel_lemire(ExpandedFloat init_num, + RoundDirection round) { using BitsType = typename fputil::FPBits::UIntType; + + BitsType mantissa = init_num.mantissa; + int32_t exp10 = init_num.exponent; + constexpr uint32_t BITS_IN_MANTISSA = sizeof(mantissa) * 8; // Exp10 Range @@ -210,7 +243,7 @@ // out to the full long double range. if (exp10 < DETAILED_POWERS_OF_TEN_MIN_EXP_10 || exp10 > DETAILED_POWERS_OF_TEN_MAX_EXP_10) { - return false; + return cpp::nullopt; } // Normalization @@ -258,7 +291,7 @@ if ((final_approx_upper & HALFWAY_CONSTANT) == HALFWAY_CONSTANT && final_approx_lower + mantissa < mantissa) { - return false; + return cpp::nullopt; } // Shifting to 65 bits for 80 bit floats and 113 bits for 128 bit floats @@ -269,15 +302,27 @@ (fputil::FloatProperties::MANTISSA_WIDTH + 3)); exp2 -= static_cast(1 ^ msb); // same as !msb - // Half-way ambiguity - if (final_approx_lower == 0 && (final_approx_upper & HALFWAY_CONSTANT) == 0 && - (final_mantissa & 3) == 1) { - return false; + if (round == RoundDirection::Nearest) { + // Half-way ambiguity + if (final_approx_lower == 0 && + (final_approx_upper & HALFWAY_CONSTANT) == 0 && + (final_mantissa & 3) == 1) { + return cpp::nullopt; + } + // Round to even. + final_mantissa += final_mantissa & 1; + + } else if (round == RoundDirection::Up) { + // If any of the bits being rounded away are non-zero, then round up. + if (final_approx_lower > 0 || (final_approx_upper & HALFWAY_CONSTANT) > 0) { + // Add two since the last current lowest bit is about to be shifted away. + final_mantissa += 2; + } } + // else round down, which has no effect. // From 65 to 64 bits for 80 bit floats and 113 to 112 bits for 128 bit // floats - final_mantissa += final_mantissa & 1; final_mantissa >>= 1; if ((final_mantissa >> (fputil::FloatProperties::MANTISSA_WIDTH + 1)) > 0) { @@ -289,12 +334,13 @@ // if exp2 <= 0 || exp2 >= MANTISSA_MAX { etc } if (exp2 - 1 >= (1 << fputil::FloatProperties::EXPONENT_WIDTH) - 2) { - return false; + return cpp::nullopt; } - *outputMantissa = final_mantissa; - *outputExp2 = exp2; - return true; + ExpandedFloat output; + output.mantissa = final_mantissa; + output.exponent = exp2; + return output; } #endif @@ -312,18 +358,18 @@ // on the Simple Decimal Conversion algorithm by Nigel Tao, described at this // link: https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html template -LIBC_INLINE void +LIBC_INLINE FloatConvertReturn simple_decimal_conversion(const char *__restrict numStart, - typename fputil::FPBits::UIntType *outputMantissa, - uint32_t *outputExp2) { + RoundDirection round = RoundDirection::Nearest) { int32_t exp2 = 0; HighPrecisionDecimal hpd = HighPrecisionDecimal(numStart); + FloatConvertReturn output; + if (hpd.get_num_digits() == 0) { - *outputMantissa = 0; - *outputExp2 = 0; - return; + output.num = {0, 0}; + return output; } // If the exponent is too large and can't be represented in this size of @@ -331,20 +377,18 @@ if (hpd.get_decimal_point() > 0 && exp10_to_exp2(hpd.get_decimal_point() - 1) > static_cast(fputil::FloatProperties::EXPONENT_BIAS)) { - *outputMantissa = 0; - *outputExp2 = fputil::FPBits::MAX_EXPONENT; - errno = ERANGE; - return; + output.num = {0, fputil::FPBits::MAX_EXPONENT}; + output.error = ERANGE; + return output; } // If the exponent is too small even for a subnormal, return 0. if (hpd.get_decimal_point() < 0 && exp10_to_exp2(-hpd.get_decimal_point()) > static_cast(fputil::FloatProperties::EXPONENT_BIAS + fputil::FloatProperties::MANTISSA_WIDTH)) { - *outputMantissa = 0; - *outputExp2 = 0; - errno = ERANGE; - return; + output.num = {0, 0}; + output.error = ERANGE; + return output; } // Right shift until the number is smaller than 1. @@ -384,10 +428,9 @@ // Handle the exponent being too large (and return inf). if (exp2 >= fputil::FPBits::MAX_EXPONENT) { - *outputMantissa = 0; - *outputExp2 = fputil::FPBits::MAX_EXPONENT; - errno = ERANGE; - return; + output.num = {0, fputil::FPBits::MAX_EXPONENT}; + output.error = ERANGE; + return output; } // Shift left to fill the mantissa @@ -406,7 +449,7 @@ // between 1 and 2. hpd.shift(-1); final_mantissa = - hpd.round_to_integer_type::UIntType>(); + hpd.round_to_integer_type::UIntType>(round); // Check if by shifting right we've caused this to round to a normal number. if ((final_mantissa >> fputil::FloatProperties::MANTISSA_WIDTH) != 0) { @@ -424,16 +467,16 @@ // INF. If this is the case, then finalMantissa and exp2 are already the // correct values for an INF result. if (exp2 >= fputil::FPBits::MAX_EXPONENT) { - errno = ERANGE; // NOLINT + output.error = ERANGE; } } if (exp2 == 0) { - errno = ERANGE; + output.error = ERANGE; } - *outputMantissa = final_mantissa; - *outputExp2 = exp2; + output.num = {final_mantissa, exp2}; + return output; } // This class is used for templating the constants for Clinger's Fast Path, @@ -510,12 +553,15 @@ // exponents, but handles them quickly. This is an implementation of Clinger's // Fast Path, as described above. template -LIBC_INLINE bool -clinger_fast_path(typename fputil::FPBits::UIntType mantissa, int32_t exp10, - typename fputil::FPBits::UIntType *outputMantissa, - uint32_t *outputExp2) { +LIBC_INLINE cpp::optional> +clinger_fast_path(ExpandedFloat init_num, + RoundDirection round = RoundDirection::Nearest) { + + typename fputil::FPBits::UIntType mantissa = init_num.mantissa; + int32_t exp10 = init_num.exponent; + if (mantissa >> fputil::FloatProperties::MANTISSA_WIDTH > 0) { - return false; + return cpp::nullopt; } fputil::FPBits result; @@ -527,7 +573,7 @@ if (exp10 > 0) { if (exp10 > ClingerConsts::EXACT_POWERS_OF_TEN + ClingerConsts::DIGITS_IN_MANTISSA) { - return false; + return cpp::nullopt; } if (exp10 > ClingerConsts::EXACT_POWERS_OF_TEN) { float_mantissa = float_mantissa * @@ -536,20 +582,53 @@ exp10 = ClingerConsts::EXACT_POWERS_OF_TEN; } if (float_mantissa > ClingerConsts::MAX_EXACT_INT) { - return false; + return cpp::nullopt; } result = fputil::FPBits(float_mantissa * ClingerConsts::POWERS_OF_TEN_ARRAY[exp10]); } else if (exp10 < 0) { if (-exp10 > ClingerConsts::EXACT_POWERS_OF_TEN) { - return false; + return cpp::nullopt; } result = fputil::FPBits(float_mantissa / ClingerConsts::POWERS_OF_TEN_ARRAY[-exp10]); } - *outputMantissa = result.get_mantissa(); - *outputExp2 = result.get_unbiased_exponent(); - return true; + + // If the rounding mode is not nearest, then the sign of the number may affect + // the result. To make sure the rounding mode is respected properly, the + // calculation is redone with a negative result, and the rounding mode is used + // to select the correct result. + if (round != RoundDirection::Nearest) { + fputil::FPBits negative_result; + // I'm 99% sure this will break under fast math optimizations. + negative_result = fputil::FPBits( + (-float_mantissa) * ClingerConsts::POWERS_OF_TEN_ARRAY[exp10]); + + // If the results are equal, then we don't need to use the rounding mode. + if (T(result) != -T(negative_result)) { + fputil::FPBits lower_result; + fputil::FPBits higher_result; + + if (T(result) < -T(negative_result)) { + lower_result = result; + higher_result = negative_result; + } else { + lower_result = negative_result; + higher_result = result; + } + + if (round == RoundDirection::Up) { + result = higher_result; + } else { + result = lower_result; + } + } + } + + ExpandedFloat output; + output.mantissa = result.get_mantissa(); + output.exponent = result.get_unbiased_exponent(); + return output; } // The upper bound is the highest base-10 exponent that could possibly give a @@ -593,63 +672,74 @@ // accuracy. The resulting mantissa and exponent are placed in outputMantissa // and outputExp2. template -LIBC_INLINE void -decimal_exp_to_float(typename fputil::FPBits::UIntType mantissa, - int32_t exp10, const char *__restrict numStart, - bool truncated, - typename fputil::FPBits::UIntType *outputMantissa, - uint32_t *outputExp2) { +LIBC_INLINE FloatConvertReturn +decimal_exp_to_float(ExpandedFloat init_num, const char *__restrict numStart, + bool truncated, RoundDirection round) { + + typename fputil::FPBits::UIntType mantissa = init_num.mantissa; + int32_t exp10 = init_num.exponent; + + FloatConvertReturn output; + cpp::optional> opt_output; + // If the exponent is too large and can't be represented in this size of // float, return inf. These bounds are relatively loose, but are mostly // serving as a first pass. Some close numbers getting through is okay. if (exp10 > get_upper_bound()) { - *outputMantissa = 0; - *outputExp2 = fputil::FPBits::MAX_EXPONENT; - errno = ERANGE; - return; + output.num = {0, fputil::FPBits::MAX_EXPONENT}; + output.error = ERANGE; + return output; } // If the exponent is too small even for a subnormal, return 0. if (exp10 < get_lower_bound()) { - *outputMantissa = 0; - *outputExp2 = 0; - errno = ERANGE; - return; + output.num = {0, 0}; + output.error = ERANGE; + return output; } + // Clinger's Fast Path and Eisel-Lemire can't set errno, but they can fail. + // For this reason the "error" field in their return values is used to + // represent whether they've failed as opposed to the errno value. Any + // non-zero value represents a failure. + #ifndef LIBC_COPT_STRTOFLOAT_DISABLE_CLINGER_FAST_PATH if (!truncated) { - if (clinger_fast_path(mantissa, exp10, outputMantissa, outputExp2)) { - return; + opt_output = clinger_fast_path(init_num, round); + // If the algorithm succeeded the error will be 0, else it will be a + // non-zero number. + if (opt_output.has_value()) { + return {opt_output.value(), 0}; } } #endif // LIBC_COPT_STRTOFLOAT_DISABLE_CLINGER_FAST_PATH #ifndef LIBC_COPT_STRTOFLOAT_DISABLE_EISEL_LEMIRE // Try Eisel-Lemire - if (eisel_lemire(mantissa, exp10, outputMantissa, outputExp2)) { + opt_output = eisel_lemire(init_num, round); + if (opt_output.has_value()) { if (!truncated) { - return; + return {opt_output.value(), 0}; } // If the mantissa is truncated, then the result may be off by the LSB, so // check if rounding the mantissa up changes the result. If not, then it's // safe, else use the fallback. - typename fputil::FPBits::UIntType first_mantissa = *outputMantissa; - uint32_t first_exp2 = *outputExp2; - if (eisel_lemire(mantissa + 1, exp10, outputMantissa, outputExp2)) { - if (*outputMantissa == first_mantissa && *outputExp2 == first_exp2) { - return; + auto secound_output = eisel_lemire({mantissa + 1, exp10}, round); + if (secound_output.has_value()) { + if (opt_output->mantissa == secound_output->mantissa && + opt_output->exponent == secound_output->exponent) { + return {opt_output.value(), 0}; } } } #endif // LIBC_COPT_STRTOFLOAT_DISABLE_EISEL_LEMIRE #ifndef LIBC_COPT_STRTOFLOAT_DISABLE_SIMPLE_DECIMAL_CONVERSION - simple_decimal_conversion(numStart, outputMantissa, outputExp2); + output = simple_decimal_conversion(numStart, round); #else #warning "Simple decimal conversion is disabled, result may not be correct." #endif // LIBC_COPT_STRTOFLOAT_DISABLE_SIMPLE_DECIMAL_CONVERSION - return; + return output; } // Takes a mantissa and base 2 exponent and converts it into its closest @@ -657,13 +747,16 @@ // form, this is mostly just shifting and rounding. This is used for hexadecimal // numbers since a base 16 exponent multiplied by 4 is the base 2 exponent. template -LIBC_INLINE void -binary_exp_to_float(typename fputil::FPBits::UIntType mantissa, int32_t exp2, - bool truncated, - typename fputil::FPBits::UIntType *outputMantissa, - uint32_t *outputExp2) { +LIBC_INLINE FloatConvertReturn binary_exp_to_float(ExpandedFloat init_num, + bool truncated, + RoundDirection round) { using BitsType = typename fputil::FPBits::UIntType; + BitsType mantissa = init_num.mantissa; + int32_t exp2 = init_num.exponent; + + FloatConvertReturn output; + // This is the number of leading zeroes a properly normalized float of type T // should have. constexpr int32_t NUMBITS = sizeof(BitsType) * 8; @@ -684,10 +777,9 @@ // Handle numbers that're too large and get squashed to inf if (biased_exponent >= INF_EXP) { // This indicates an overflow, so we make the result INF and set errno. - *outputExp2 = (1 << fputil::FloatProperties::EXPONENT_WIDTH) - 1; - *outputMantissa = 0; - errno = ERANGE; - return; + output.num = {0, (1 << fputil::FloatProperties::EXPONENT_WIDTH) - 1}; + output.error = ERANGE; + return output; } uint32_t amount_to_shift_right = @@ -700,10 +792,9 @@ if (amount_to_shift_right > NUMBITS) { // Return 0 if the exponent is too small. - *outputMantissa = 0; - *outputExp2 = 0; - errno = ERANGE; - return; + output.num = {0, 0}; + output.error = ERANGE; + return output; } } @@ -720,9 +811,22 @@ mantissa = 0; } bool least_significant_bit = mantissa & BitsType(1); - // Perform rounding-to-nearest, tie-to-even. - if (round_bit && (least_significant_bit || sticky_bit)) { - ++mantissa; + + // TODO: check that this rounding behavior is correct. + + if (round == RoundDirection::Nearest) { + // Perform rounding-to-nearest, tie-to-even. + if (round_bit && (least_significant_bit || sticky_bit)) { + ++mantissa; + } + } else if (round == RoundDirection::Up) { + if (round_bit || sticky_bit) { + ++mantissa; + } + } else /* (round == RoundDirection::Down)*/ { + if (round_bit && sticky_bit) { + ++mantissa; + } } if (mantissa > fputil::FloatProperties::MANTISSA_MASK) { @@ -730,30 +834,31 @@ ++biased_exponent; if (biased_exponent == INF_EXP) { - errno = ERANGE; + output.error = ERANGE; } } if (biased_exponent == 0) { - errno = ERANGE; + output.error = ERANGE; } - *outputMantissa = mantissa & fputil::FloatProperties::MANTISSA_MASK; - *outputExp2 = biased_exponent; + output.num = {mantissa & fputil::FloatProperties::MANTISSA_MASK, + biased_exponent}; + return output; } // checks if the next 4 characters of the string pointer are the start of a // hexadecimal floating point number. Does not advance the string pointer. LIBC_INLINE bool is_float_hex_start(const char *__restrict src, const char decimalPoint) { - if (!(*src == '0' && (*(src + 1) | 32) == 'x')) { + if (!(src[0] == '0' && tolower(src[1]) == 'x')) { return false; } - if (*(src + 2) == decimalPoint) { - return isalnum(*(src + 3)) && b36_char_to_int(*(src + 3)) < 16; - } else { - return isalnum(*(src + 2)) && b36_char_to_int(*(src + 2)) < 16; + size_t first_digit = 2; + if (src[2] == decimalPoint) { + ++first_digit; } + return isalnum(src[first_digit]) && b36_char_to_int(src[first_digit]) < 16; } // Takes the start of a string representing a decimal float, as well as the @@ -763,22 +868,23 @@ // If the return value is false, then it is assumed that there is no number // here. template -LIBC_INLINE bool +LIBC_INLINE StrToNumResult> decimal_string_to_float(const char *__restrict src, const char DECIMAL_POINT, - char **__restrict strEnd, - typename fputil::FPBits::UIntType *outputMantissa, - uint32_t *outputExponent) { + RoundDirection round) { using BitsType = typename fputil::FPBits::UIntType; constexpr uint32_t BASE = 10; constexpr char EXPONENT_MARKER = 'e'; - const char *__restrict num_start = src; bool truncated = false; bool seen_digit = false; bool after_decimal = false; BitsType mantissa = 0; int32_t exponent = 0; + size_t index = 0; + + StrToNumResult> output({0, 0}); + // The goal for the first step of parsing is to convert the number in src to // the format mantissa * (base ^ exponent) @@ -786,8 +892,8 @@ const BitsType bitstype_max_div_by_base = cpp::numeric_limits::max() / BASE; while (true) { - if (isdigit(*src)) { - uint32_t digit = *src - '0'; + if (isdigit(src[index])) { + uint32_t digit = src[index] - '0'; seen_digit = true; if (mantissa < bitstype_max_div_by_base) { @@ -802,16 +908,16 @@ ++exponent; } - ++src; + ++index; continue; } - if (*src == DECIMAL_POINT) { + if (src[index] == DECIMAL_POINT) { if (after_decimal) { - break; // this means that *src points to a second decimal point, ending - // the number. + break; // this means that src[index] points to a second decimal point, + // ending the number. } after_decimal = true; - ++src; + ++index; continue; } // The character is neither a digit nor a decimal point. @@ -819,35 +925,36 @@ } if (!seen_digit) - return false; - - if ((*src | 32) == EXPONENT_MARKER) { - if (*(src + 1) == '+' || *(src + 1) == '-' || isdigit(*(src + 1))) { - ++src; - char *temp_str_end; - auto result = strtointeger(src, 10); - // TODO: If error, return with error. - temp_str_end = const_cast(src + result.parsed_len); + return output; + + if (tolower(src[index]) == EXPONENT_MARKER) { + if (src[index + 1] == '+' || src[index + 1] == '-' || + isdigit(src[index + 1])) { + ++index; + auto result = strtointeger(src + index, 10); + if (result.has_error()) + output.error = result.error; int32_t add_to_exponent = result.value; if (add_to_exponent > 100000) add_to_exponent = 100000; else if (add_to_exponent < -100000) add_to_exponent = -100000; - src = temp_str_end; + index += result.parsed_len; exponent += add_to_exponent; } } - *strEnd = const_cast(src); + output.parsed_len = index; if (mantissa == 0) { // if we have a 0, then also 0 the exponent. - *outputMantissa = 0; - *outputExponent = 0; + output.value = {0, 0}; } else { - decimal_exp_to_float(mantissa, exponent, num_start, truncated, - outputMantissa, outputExponent); + auto temp = + decimal_exp_to_float({mantissa, exponent}, src, truncated, round); + output.value = temp.num; + output.error = temp.error; } - return true; + return output; } // Takes the start of a string representing a hexadecimal float, as well as the @@ -857,11 +964,9 @@ // If the return value is false, then it is assumed that there is no number // here. template -LIBC_INLINE bool hexadecimal_string_to_float( - const char *__restrict src, const char DECIMAL_POINT, - char **__restrict strEnd, - typename fputil::FPBits::UIntType *outputMantissa, - uint32_t *outputExponent) { +LIBC_INLINE StrToNumResult> +hexadecimal_string_to_float(const char *__restrict src, + const char DECIMAL_POINT, RoundDirection round) { using BitsType = typename fputil::FPBits::UIntType; constexpr uint32_t BASE = 16; constexpr char EXPONENT_MARKER = 'p'; @@ -872,6 +977,10 @@ BitsType mantissa = 0; int32_t exponent = 0; + size_t index = 0; + + StrToNumResult> output({0, 0}); + // The goal for the first step of parsing is to convert the number in src to // the format mantissa * (base ^ exponent) @@ -879,8 +988,8 @@ const BitsType bitstype_max_div_by_base = cpp::numeric_limits::max() / BASE; while (true) { - if (isalnum(*src)) { - uint32_t digit = b36_char_to_int(*src); + if (isalnum(src[index])) { + uint32_t digit = b36_char_to_int(src[index]); if (digit < BASE) seen_digit = true; else @@ -896,16 +1005,16 @@ if (!after_decimal) ++exponent; } - ++src; + ++index; continue; } - if (*src == DECIMAL_POINT) { + if (src[index] == DECIMAL_POINT) { if (after_decimal) { - break; // this means that *src points to a second decimal point, ending - // the number. + break; // this means that src[index] points to a second decimal point, + // ending the number. } after_decimal = true; - ++src; + ++index; continue; } // The character is neither a hexadecimal digit nor a decimal point. @@ -913,118 +1022,147 @@ } if (!seen_digit) - return false; + return output; // Convert the exponent from having a base of 16 to having a base of 2. exponent *= 4; - if ((*src | 32) == EXPONENT_MARKER) { - if (*(src + 1) == '+' || *(src + 1) == '-' || isdigit(*(src + 1))) { - ++src; - char *temp_str_end; - auto result = strtointeger(src, 10); - // TODO: If error, return error. - temp_str_end = const_cast(src + result.parsed_len); + if (tolower(src[index]) == EXPONENT_MARKER) { + if (src[index + 1] == '+' || src[index + 1] == '-' || + isdigit(src[index + 1])) { + ++index; + auto result = strtointeger(src + index, 10); + if (result.has_error()) + output.error = result.error; + int32_t add_to_exponent = result.value; if (add_to_exponent > 100000) add_to_exponent = 100000; else if (add_to_exponent < -100000) add_to_exponent = -100000; - src = temp_str_end; + index += result.parsed_len; exponent += add_to_exponent; } } - *strEnd = const_cast(src); + output.parsed_len = index; if (mantissa == 0) { // if we have a 0, then also 0 the exponent. - *outputMantissa = 0; - *outputExponent = 0; + output.value.exponent = 0; + output.value.mantissa = 0; } else { - binary_exp_to_float(mantissa, exponent, truncated, outputMantissa, - outputExponent); + auto temp = binary_exp_to_float({mantissa, exponent}, truncated, round); + output.error = temp.error; + output.value = temp.num; } - return true; + return output; } // Takes a pointer to a string and a pointer to a string pointer. This function // is used as the backend for all of the string to float functions. template -LIBC_INLINE T strtofloatingpoint(const char *__restrict src, - char **__restrict strEnd) { +LIBC_INLINE StrToNumResult strtofloatingpoint(const char *__restrict src) { using BitsType = typename fputil::FPBits::UIntType; fputil::FPBits result = fputil::FPBits(); - const char *original_src = src; bool seen_digit = false; - src = first_non_whitespace(src); + char sign = '+'; - if (*src == '+' || *src == '-') { - if (*src == '-') { - result.set_sign(true); - } - ++src; + int error = 0; + + ptrdiff_t index = first_non_whitespace(src) - src; + + if (src[index] == '+' || src[index] == '-') { + sign = src[index]; + ++index; + } + + if (sign == '-') { + result.set_sign(true); } static constexpr char DECIMAL_POINT = '.'; static const char *inf_string = "infinity"; static const char *nan_string = "nan"; - // bool truncated = false; - - if (isdigit(*src) || *src == DECIMAL_POINT) { // regular number + if (isdigit(src[index]) || src[index] == DECIMAL_POINT) { // regular number int base = 10; - if (is_float_hex_start(src, DECIMAL_POINT)) { + if (is_float_hex_start(src + index, DECIMAL_POINT)) { base = 16; - src += 2; + index += 2; seen_digit = true; } - char *new_str_end = nullptr; - BitsType output_mantissa = ~0; - uint32_t output_exponent = ~0; - if (base == 16) { - seen_digit = hexadecimal_string_to_float( - src, DECIMAL_POINT, &new_str_end, &output_mantissa, &output_exponent); - } else { // base is 10 - seen_digit = decimal_string_to_float( - src, DECIMAL_POINT, &new_str_end, &output_mantissa, &output_exponent); + RoundDirection round_direction = RoundDirection::Nearest; + + switch (fputil::get_round()) { + case FE_TONEAREST: + round_direction = RoundDirection::Nearest; + break; + case FE_UPWARD: + if (sign == '+') { + round_direction = RoundDirection::Up; + } else { + round_direction = RoundDirection::Down; + } + break; + case FE_DOWNWARD: + if (sign == '+') { + round_direction = RoundDirection::Down; + } else { + round_direction = RoundDirection::Up; + } + break; + case FE_TOWARDZERO: + round_direction = RoundDirection::Down; + break; } - if (seen_digit) { - src += new_str_end - src; - result.set_mantissa(output_mantissa); - result.set_unbiased_exponent(output_exponent); + StrToNumResult> parse_result({0, 0}); + if (base == 16) { + parse_result = hexadecimal_string_to_float(src + index, DECIMAL_POINT, + round_direction); + } else { // base is 10 + parse_result = decimal_string_to_float(src + index, DECIMAL_POINT, + round_direction); } - } else if ((*src | 32) == 'n') { // NaN - if ((src[1] | 32) == nan_string[1] && (src[2] | 32) == nan_string[2]) { + seen_digit = parse_result.parsed_len != 0; + result.set_mantissa(parse_result.value.mantissa); + result.set_unbiased_exponent(parse_result.value.exponent); + index += parse_result.parsed_len; + error = parse_result.error; + } else if (tolower(src[index]) == 'n') { // NaN + if (tolower(src[index + 1]) == nan_string[1] && + tolower(src[index + 2]) == nan_string[2]) { seen_digit = true; - src += 3; + index += 3; BitsType nan_mantissa = 0; // this handles the case of `NaN(n-character-sequence)`, where the // n-character-sequence is made of 0 or more letters and numbers in any // order. - if (*src == '(') { - const char *left_paren = src; - ++src; - while (isalnum(*src)) - ++src; - if (*src == ')') { - ++src; - char *temp_src = 0; - if (isdigit(*(left_paren + 1))) { + if (src[index] == '(') { + size_t left_paren = index; + ++index; + while (isalnum(src[index])) + ++index; + if (src[index] == ')') { + ++index; + if (isdigit(src[left_paren + 1])) { // This is to prevent errors when BitsType is larger than 64 bits, // since strtointeger only supports up to 64 bits. This is actually // more than is required by the specification, which says for the // input type "NAN(n-char-sequence)" that "the meaning of // the n-char sequence is implementation-defined." - auto result = strtointeger(left_paren + 1, 0); - // TODO: If error, return error - temp_src = const_cast(left_paren + 1 + result.parsed_len); - nan_mantissa = result.value; - if (*temp_src != ')') + auto strtoint_result = + strtointeger(src + (left_paren + 1), 0); + if (strtoint_result.has_error()) { + error = strtoint_result.error; + } + nan_mantissa = strtoint_result.value; + if (src[left_paren + 1 + strtoint_result.parsed_len] != ')') nan_mantissa = 0; } - } else - src = left_paren; + } else { + index = left_paren; + } } nan_mantissa |= fputil::FloatProperties::QUIET_NAN_MASK; if (result.get_sign()) { @@ -1035,37 +1173,35 @@ result = fputil::FPBits(result.build_quiet_nan(nan_mantissa)); } } - } else if ((*src | 32) == 'i') { // INF - if ((src[1] | 32) == inf_string[1] && (src[2] | 32) == inf_string[2]) { + } else if (tolower(src[index]) == 'i') { // INF + if (tolower(src[index + 1]) == inf_string[1] && + tolower(src[index + 2]) == inf_string[2]) { seen_digit = true; if (result.get_sign()) result = result.neg_inf(); else result = result.inf(); - if ((src[3] | 32) == inf_string[3] && (src[4] | 32) == inf_string[4] && - (src[5] | 32) == inf_string[5] && (src[6] | 32) == inf_string[6] && - (src[7] | 32) == inf_string[7]) { - // if the string is "INFINITY" then strEnd needs to be set to src + 8. - src += 8; + if (tolower(src[index + 3]) == inf_string[3] && + tolower(src[index + 4]) == inf_string[4] && + tolower(src[index + 5]) == inf_string[5] && + tolower(src[index + 6]) == inf_string[6] && + tolower(src[index + 7]) == inf_string[7]) { + // if the string is "INFINITY" then consume 8 characters. + index += 8; } else { - src += 3; + index += 3; } } } if (!seen_digit) { // If there is nothing to actually parse, then return 0. - if (strEnd != nullptr) - *strEnd = const_cast(original_src); - return T(0); + return {T(0), 0, error}; } - if (strEnd != nullptr) - *strEnd = const_cast(src); - // This function only does something if T is long double and the platform uses // special 80 bit long doubles. Otherwise it should be inlined out. set_implicit_bit(result); - return T(result); + return {T(result), index, error}; } } // namespace internal diff --git a/libc/src/stdio/scanf_core/converter_utils.h b/libc/src/stdio/scanf_core/converter_utils.h --- a/libc/src/stdio/scanf_core/converter_utils.h +++ b/libc/src/stdio/scanf_core/converter_utils.h @@ -90,17 +90,17 @@ LengthModifier lm = to_conv.length_modifier; switch (lm) { case (LengthModifier::l): { - auto value = internal::strtofloatingpoint(str, nullptr); + auto value = internal::strtofloatingpoint(str); *reinterpret_cast(output_ptr) = value; break; } case (LengthModifier::L): { - auto value = internal::strtofloatingpoint(str, nullptr); + auto value = internal::strtofloatingpoint(str); *reinterpret_cast(output_ptr) = value; break; } default: { - auto value = internal::strtofloatingpoint(str, nullptr); + auto value = internal::strtofloatingpoint(str); *reinterpret_cast(output_ptr) = value; break; } diff --git a/libc/src/stdlib/atof.cpp b/libc/src/stdlib/atof.cpp --- a/libc/src/stdlib/atof.cpp +++ b/libc/src/stdlib/atof.cpp @@ -9,11 +9,16 @@ #include "src/stdlib/atof.h" #include "src/__support/common.h" #include "src/__support/str_to_float.h" +#include namespace __llvm_libc { LLVM_LIBC_FUNCTION(double, atof, (const char *str)) { - return internal::strtofloatingpoint(str, nullptr); + auto result = internal::strtofloatingpoint(str); + if (result.has_error()) + errno = result.error; + + return result.value; } } // namespace __llvm_libc diff --git a/libc/src/stdlib/strtod.cpp b/libc/src/stdlib/strtod.cpp --- a/libc/src/stdlib/strtod.cpp +++ b/libc/src/stdlib/strtod.cpp @@ -9,12 +9,20 @@ #include "src/stdlib/strtod.h" #include "src/__support/common.h" #include "src/__support/str_to_float.h" +#include namespace __llvm_libc { LLVM_LIBC_FUNCTION(double, strtod, (const char *__restrict str, char **__restrict str_end)) { - return internal::strtofloatingpoint(str, str_end); + auto result = internal::strtofloatingpoint(str); + if (result.has_error()) + errno = result.error; + + if (str_end != NULL) + *str_end = const_cast(str + result.parsed_len); + + return result.value; } } // namespace __llvm_libc diff --git a/libc/src/stdlib/strtof.cpp b/libc/src/stdlib/strtof.cpp --- a/libc/src/stdlib/strtof.cpp +++ b/libc/src/stdlib/strtof.cpp @@ -9,12 +9,20 @@ #include "src/stdlib/strtof.h" #include "src/__support/common.h" #include "src/__support/str_to_float.h" +#include namespace __llvm_libc { LLVM_LIBC_FUNCTION(float, strtof, (const char *__restrict str, char **__restrict str_end)) { - return internal::strtofloatingpoint(str, str_end); + auto result = internal::strtofloatingpoint(str); + if (result.has_error()) + errno = result.error; + + if (str_end != NULL) + *str_end = const_cast(str + result.parsed_len); + + return result.value; } } // namespace __llvm_libc diff --git a/libc/src/stdlib/strtold.cpp b/libc/src/stdlib/strtold.cpp --- a/libc/src/stdlib/strtold.cpp +++ b/libc/src/stdlib/strtold.cpp @@ -9,12 +9,20 @@ #include "src/stdlib/strtold.h" #include "src/__support/common.h" #include "src/__support/str_to_float.h" +#include namespace __llvm_libc { LLVM_LIBC_FUNCTION(long double, strtold, (const char *__restrict str, char **__restrict str_end)) { - return internal::strtofloatingpoint(str, str_end); + auto result = internal::strtofloatingpoint(str); + if (result.has_error()) + errno = result.error; + + if (str_end != NULL) + *str_end = const_cast(str + result.parsed_len); + + return result.value; } } // namespace __llvm_libc diff --git a/libc/test/src/__support/str_to_float_test.cpp b/libc/test/src/__support/str_to_float_test.cpp --- a/libc/test/src/__support/str_to_float_test.cpp +++ b/libc/test/src/__support/str_to_float_test.cpp @@ -25,9 +25,14 @@ 0; uint32_t actual_output_exp2 = 0; - ASSERT_TRUE(__llvm_libc::internal::clinger_fast_path( - inputMantissa, inputExp10, &actual_output_mantissa, - &actual_output_exp2)); + auto result = __llvm_libc::internal::clinger_fast_path( + {inputMantissa, inputExp10}); + + ASSERT_TRUE(result.has_value()); + + actual_output_mantissa = result->mantissa; + actual_output_exp2 = result->exponent; + EXPECT_EQ(actual_output_mantissa, expectedOutputMantissa); EXPECT_EQ(actual_output_exp2, expectedOutputExp2); } @@ -36,13 +41,9 @@ void clinger_fast_path_fails_test( const typename __llvm_libc::fputil::FPBits::UIntType inputMantissa, const int32_t inputExp10) { - typename __llvm_libc::fputil::FPBits::UIntType actual_output_mantissa = - 0; - uint32_t actual_output_exp2 = 0; - - ASSERT_FALSE(__llvm_libc::internal::clinger_fast_path( - inputMantissa, inputExp10, &actual_output_mantissa, - &actual_output_exp2)); + ASSERT_FALSE( + __llvm_libc::internal::clinger_fast_path({inputMantissa, inputExp10}) + .has_value()); } template @@ -56,9 +57,14 @@ 0; uint32_t actual_output_exp2 = 0; - ASSERT_TRUE(__llvm_libc::internal::eisel_lemire( - inputMantissa, inputExp10, &actual_output_mantissa, - &actual_output_exp2)); + auto result = + __llvm_libc::internal::eisel_lemire({inputMantissa, inputExp10}); + + ASSERT_TRUE(result.has_value()); + + actual_output_mantissa = result->mantissa; + actual_output_exp2 = result->exponent; + EXPECT_EQ(actual_output_mantissa, expectedOutputMantissa); EXPECT_EQ(actual_output_exp2, expectedOutputExp2); } @@ -74,11 +80,14 @@ uint32_t actual_output_exp2 = 0; errno = 0; - __llvm_libc::internal::simple_decimal_conversion( - numStart, &actual_output_mantissa, &actual_output_exp2); + auto result = __llvm_libc::internal::simple_decimal_conversion(numStart); + + actual_output_mantissa = result.num.mantissa; + actual_output_exp2 = result.num.exponent; + EXPECT_EQ(actual_output_mantissa, expectedOutputMantissa); EXPECT_EQ(actual_output_exp2, expectedOutputExp2); - EXPECT_EQ(errno, expectedErrno); + EXPECT_EQ(result.error, expectedErrno); } }; @@ -172,20 +181,17 @@ eisel_lemire_test(2794967654709307188u, 1, 0x183e132bc608c9, 1087); } +// Check the fallback states for the algorithm: TEST_F(LlvmLibcStrToFloatTest, EiselLemireFallbackStates) { - // Check the fallback states for the algorithm: - uint32_t float_output_mantissa = 0; - uint64_t double_output_mantissa = 0; - uint32_t output_exp2 = 0; - // This number can't be evaluated by Eisel-Lemire since it's exactly 1024 away // from both of its closest floating point approximations // (12345678901234548736 and 12345678901234550784) - ASSERT_FALSE(__llvm_libc::internal::eisel_lemire( - 12345678901234549760u, 0, &double_output_mantissa, &output_exp2)); + ASSERT_FALSE( + __llvm_libc::internal::eisel_lemire({12345678901234549760u, 0}) + .has_value()); - ASSERT_FALSE(__llvm_libc::internal::eisel_lemire( - 20040229, 0, &float_output_mantissa, &output_exp2)); + ASSERT_FALSE( + __llvm_libc::internal::eisel_lemire({20040229, 0}).has_value()); } TEST_F(LlvmLibcStrToFloatTest, SimpleDecimalConversion64BasicWholeNumbers) { @@ -245,21 +251,27 @@ uint32_t output_exp2 = 0; errno = 0; - __llvm_libc::internal::simple_decimal_conversion( - "123456789012345678900", &float_output_mantissa, &output_exp2); + auto float_result = __llvm_libc::internal::simple_decimal_conversion( + "123456789012345678900"); + float_output_mantissa = float_result.num.mantissa; + output_exp2 = float_result.num.exponent; EXPECT_EQ(float_output_mantissa, uint32_t(0xd629d4)); EXPECT_EQ(output_exp2, uint32_t(193)); - EXPECT_EQ(errno, 0); + EXPECT_EQ(float_result.error, 0); uint64_t double_output_mantissa = 0; output_exp2 = 0; errno = 0; - __llvm_libc::internal::simple_decimal_conversion( - "123456789012345678900", &double_output_mantissa, &output_exp2); + auto double_result = __llvm_libc::internal::simple_decimal_conversion( + "123456789012345678900"); + + double_output_mantissa = double_result.num.mantissa; + output_exp2 = double_result.num.exponent; + EXPECT_EQ(double_output_mantissa, uint64_t(0x1AC53A7E04BCDA)); EXPECT_EQ(output_exp2, uint32_t(1089)); - EXPECT_EQ(errno, 0); + EXPECT_EQ(double_result.error, 0); } #if defined(LONG_DOUBLE_IS_DOUBLE) @@ -299,20 +311,18 @@ } TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat80Fallback) { - uint32_t outputExp2 = 0; - UInt128 quadOutputMantissa = 0; - // This number is halfway between two possible results, and the algorithm // can't determine which is correct. ASSERT_FALSE(__llvm_libc::internal::eisel_lemire( - 12345678901234567890u, 1, &quadOutputMantissa, &outputExp2)); + {12345678901234567890u, 1}) + .has_value()); // These numbers' exponents are out of range for the current powers of ten // table. - ASSERT_FALSE(__llvm_libc::internal::eisel_lemire( - 1, 1000, &quadOutputMantissa, &outputExp2)); - ASSERT_FALSE(__llvm_libc::internal::eisel_lemire( - 1, -1000, &quadOutputMantissa, &outputExp2)); + ASSERT_FALSE( + __llvm_libc::internal::eisel_lemire({1, 1000}).has_value()); + ASSERT_FALSE( + __llvm_libc::internal::eisel_lemire({1, -1000}).has_value()); } #else // Quad precision long double TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat128Simple) { @@ -336,11 +346,10 @@ } TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat128Fallback) { - uint32_t outputExp2 = 0; - UInt128 quadOutputMantissa = 0; - - ASSERT_FALSE(__llvm_libc::internal::eisel_lemire( - (UInt128(0x5ce0e9a56015fec5) << 64) + UInt128(0xaadfa328ae39b333), 1, - &quadOutputMantissa, &outputExp2)); + ASSERT_FALSE( + __llvm_libc::internal::eisel_lemire( + {(UInt128(0x5ce0e9a56015fec5) << 64) + UInt128(0xaadfa328ae39b333), + 1}, ) + .has_value()); } #endif diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -302,9 +302,12 @@ ":__support_builtin_wrappers", ":__support_common", ":__support_cpp_limits", + ":__support_cpp_optional", ":__support_ctype_utils", + ":__support_fputil_fenv_impl", ":__support_fputil_fp_bits", ":__support_str_to_integer", + ":__support_str_to_num_result", ":__support_uint128", ], )