diff --git a/libc/cmake/modules/LLVMLibCTestRules.cmake b/libc/cmake/modules/LLVMLibCTestRules.cmake --- a/libc/cmake/modules/LLVMLibCTestRules.cmake +++ b/libc/cmake/modules/LLVMLibCTestRules.cmake @@ -316,7 +316,7 @@ function(add_libc_fuzzer target_name) cmake_parse_arguments( "LIBC_FUZZER" - "" # No optional arguments + "NEED_MPFR" # Optional arguments "" # Single value arguments "SRCS;HDRS;DEPENDS;COMPILE_OPTIONS" # Multi-value arguments ${ARGN} @@ -330,6 +330,16 @@ "'add_entrypoint_object' targets.") endif() + list(APPEND LIBC_FUZZER_LINK_LIBRARIES "") + if(LIBC_FUZZER_NEED_MPFR) + if(NOT LIBC_TESTS_CAN_USE_MPFR) + message(VERBOSE "Fuzz test ${name} will be skipped as MPFR library is not available.") + return() + endif() + list(APPEND LIBC_FUZZER_LINK_LIBRARIES mpfr gmp) + endif() + + get_fq_target_name(${target_name} fq_target_name) get_fq_deps_list(fq_deps_list ${LIBC_FUZZER_DEPENDS}) get_object_files_for_test( @@ -365,7 +375,10 @@ ${LIBC_BUILD_DIR}/include ) - target_link_libraries(${fq_target_name} PRIVATE ${link_object_files}) + target_link_libraries(${fq_target_name} PRIVATE + ${link_object_files} + ${LIBC_FUZZER_LINK_LIBRARIES} + ) set_target_properties(${fq_target_name} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/libc/fuzzing/stdlib/CMakeLists.txt b/libc/fuzzing/stdlib/CMakeLists.txt --- a/libc/fuzzing/stdlib/CMakeLists.txt +++ b/libc/fuzzing/stdlib/CMakeLists.txt @@ -14,12 +14,11 @@ StringParserOutputDiff.h DEPENDS libc.src.stdlib.atof - COMPILE_OPTIONS - -DLLVM_LIBC_ATOF_DIF_FUZZ_SKIP_GLIBC_HEX_SUBNORMAL_ERR ) add_libc_fuzzer( strtofloat_fuzz + NEED_MPFR SRCS strtofloat_fuzz.cpp DEPENDS diff --git a/libc/fuzzing/stdlib/atof_differential_fuzz.cpp b/libc/fuzzing/stdlib/atof_differential_fuzz.cpp --- a/libc/fuzzing/stdlib/atof_differential_fuzz.cpp +++ b/libc/fuzzing/stdlib/atof_differential_fuzz.cpp @@ -16,40 +16,6 @@ #include "fuzzing/stdlib/StringParserOutputDiff.h" -// TODO: Remove this once glibc fixes hex subnormal rounding. See -// https://sourceware.org/bugzilla/show_bug.cgi?id=30220 -#ifdef LLVM_LIBC_ATOF_DIF_FUZZ_SKIP_GLIBC_HEX_SUBNORMAL_ERR -#include -constexpr double MIN_NORMAL = 0x1p-1022; - -bool has_hex_prefix(const uint8_t *str) { - size_t index = 0; - - // Skip over leading whitespace - while (isspace(str[index])) { - ++index; - } - // Skip over sign - if (str[index] == '-' || str[index] == '+') { - ++index; - } - return str[index] == '0' && (tolower(str[index + 1])) == 'x'; -} - -bool should_be_skipped(const uint8_t *str) { - double init_result = ::atof(reinterpret_cast(str)); - if (init_result < 0) { - init_result = -init_result; - } - if (init_result < MIN_NORMAL && init_result != 0) { - return has_hex_prefix(str); - } - return false; -} -#else -bool should_be_skipped(const uint8_t *) { return false; } -#endif // LLVM_LIBC_ATOF_DIF_FUZZ_SKIP_GLIBC_HEX_SUBNORMAL_ERR - extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { uint8_t *container = new uint8_t[size + 1]; if (!container) @@ -60,10 +26,7 @@ container[i] = data[i]; container[size] = '\0'; // Add null terminator to container. - if (!should_be_skipped(container)) { - StringParserOutputDiff(&__llvm_libc::atof, &::atof, container, - size); - } + StringParserOutputDiff(&__llvm_libc::atof, &::atof, container, size); delete[] container; return 0; } diff --git a/libc/fuzzing/stdlib/strtofloat_fuzz.cpp b/libc/fuzzing/stdlib/strtofloat_fuzz.cpp --- a/libc/fuzzing/stdlib/strtofloat_fuzz.cpp +++ b/libc/fuzzing/stdlib/strtofloat_fuzz.cpp @@ -13,50 +13,87 @@ #include "src/stdlib/strtod.h" #include "src/stdlib/strtof.h" #include "src/stdlib/strtold.h" + #include #include #include +#include + extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { uint8_t *container = new uint8_t[size + 1]; if (!container) __builtin_trap(); size_t i; - for (i = 0; i < size; ++i) - container[i] = data[i]; + for (i = 0; i < size; ++i) { + // MPFR's strtofr uses "@" as a base-independent exponent symbol + if (data[i] != '@') + container[i] = data[i]; + else { + container[i] = '#'; + } + } container[size] = '\0'; // Add null terminator to container. const char *str_ptr = reinterpret_cast(container); char *out_ptr = nullptr; - // This fuzzer only checks that the algorithms didn't read beyond the end of - // the string in container. Combined with sanitizers, this will check that the - // code is not reading memory beyond what's expected. This test does not - // effectively check the correctness of the result. + mpfr_t result; + mpfr_init2(result, 256); + mpfr_t bin_result; + mpfr_init2(bin_result, 256); + mpfr_strtofr(result, str_ptr, &out_ptr, 0 /* base */, MPFR_RNDN); + ptrdiff_t result_strlen = out_ptr - str_ptr; + mpfr_strtofr(bin_result, str_ptr, &out_ptr, 2 /* base */, MPFR_RNDN); + ptrdiff_t bin_result_strlen = out_ptr - str_ptr; + + long double bin_result_ld = mpfr_get_ld(bin_result, MPFR_RNDN); + long double result_ld = mpfr_get_ld(result, MPFR_RNDN); + + // This detects if mpfr's strtofr selected a base of 2, which libc does not + // support. If a base 2 decoding is detected, it is replaced by a base 10 + // decoding. + if ((bin_result_ld != 0.0 || bin_result_strlen == result_strlen) && + bin_result_ld == result_ld) { + mpfr_strtofr(result, str_ptr, &out_ptr, 10 /* base */, MPFR_RNDN); + result_strlen = out_ptr - str_ptr; + } + auto volatile atof_output = __llvm_libc::atof(str_ptr); auto volatile strtof_output = __llvm_libc::strtof(str_ptr, &out_ptr); - if (str_ptr + size < out_ptr) + ptrdiff_t strtof_strlen = out_ptr - str_ptr; + if (result_strlen != strtof_strlen) __builtin_trap(); auto volatile strtod_output = __llvm_libc::strtod(str_ptr, &out_ptr); - if (str_ptr + size < out_ptr) + ptrdiff_t strtod_strlen = out_ptr - str_ptr; + if (result_strlen != strtod_strlen) __builtin_trap(); auto volatile strtold_output = __llvm_libc::strtold(str_ptr, &out_ptr); - if (str_ptr + size < out_ptr) + ptrdiff_t strtold_strlen = out_ptr - str_ptr; + if (result_strlen != strtold_strlen) __builtin_trap(); - // If any of the outputs are NaN - if (isnan(atof_output) || isnan(strtof_output) || isnan(strtod_output) || - isnan(strtold_output)) { - // Then all the outputs should be NaN. - // This is a trivial check meant to silence the "unused variable" warnings. - if (!isnan(atof_output) || !isnan(strtof_output) || !isnan(strtod_output) || - !isnan(strtold_output)) { + // If any result is NaN, all of them should be NaN. We can't use the usual + // comparisons because NaN != NaN. + if (isnan(result_ld)) { + if (!(isnan(atof_output) && isnan(strtof_output) && isnan(strtod_output) && + isnan(strtold_output))) + __builtin_trap(); + } else { + if (mpfr_get_d(result, MPFR_RNDN) != atof_output) + __builtin_trap(); + if (mpfr_get_flt(result, MPFR_RNDN) != strtof_output) + __builtin_trap(); + if (mpfr_get_d(result, MPFR_RNDN) != strtod_output) + __builtin_trap(); + if (mpfr_get_ld(result, MPFR_RNDN) != strtold_output) __builtin_trap(); - } } + mpfr_clear(result); + mpfr_clear(bin_result); delete[] container; return 0; } diff --git a/libc/src/__support/str_to_float.h b/libc/src/__support/str_to_float.h --- a/libc/src/__support/str_to_float.h +++ b/libc/src/__support/str_to_float.h @@ -265,10 +265,15 @@ UInt128 approx_upper = static_cast(high64(mantissa)) * static_cast(power_of_ten[1]); - UInt128 approx_middle = static_cast(high64(mantissa)) * - static_cast(power_of_ten[0]) + - static_cast(low64(mantissa)) * - static_cast(power_of_ten[1]); + UInt128 approx_middle_a = static_cast(high64(mantissa)) * + static_cast(power_of_ten[0]); + UInt128 approx_middle_b = static_cast(low64(mantissa)) * + static_cast(power_of_ten[1]); + + UInt128 approx_middle = approx_middle_a + approx_middle_b; + + // Handle overflow in the middle + approx_upper += (approx_middle < approx_middle_a) ? UInt128(1) << 64 : 0; UInt128 approx_lower = static_cast(low64(mantissa)) * static_cast(power_of_ten[0]); @@ -928,8 +933,11 @@ return output; if (tolower(src[index]) == EXPONENT_MARKER) { - if (src[index + 1] == '+' || src[index + 1] == '-' || - isdigit(src[index + 1])) { + bool has_sign = false; + if (src[index + 1] == '+' || src[index + 1] == '-') { + has_sign = true; + } + if (isdigit(src[index + 1 + static_cast(has_sign)])) { ++index; auto result = strtointeger(src + index, 10); if (result.has_error()) @@ -1036,8 +1044,11 @@ exponent *= 4; if (tolower(src[index]) == EXPONENT_MARKER) { - if (src[index + 1] == '+' || src[index + 1] == '-' || - isdigit(src[index + 1])) { + bool has_sign = false; + if (src[index + 1] == '+' || src[index + 1] == '-') { + has_sign = true; + } + if (isdigit(src[index + 1 + static_cast(has_sign)])) { ++index; auto result = strtointeger(src + index, 10); if (result.has_error()) diff --git a/libc/test/src/stdlib/strtod_test.cpp b/libc/test/src/stdlib/strtod_test.cpp --- a/libc/test/src/stdlib/strtod_test.cpp +++ b/libc/test/src/stdlib/strtod_test.cpp @@ -223,6 +223,11 @@ // Same as above but for hex. run_test("0x0164810157p2047", 17, uint64_t(0x7ff0000000000000), ERANGE); + // This test ensures that only the correct number of characters is accepted. + // An exponent symbol followed by a sign isn't a valid exponent. + run_test("2e+", 1, uint64_t(0x4000000000000000)); + run_test("0x2p+", 3, uint64_t(0x4000000000000000)); + // This bug was in the handling of very large exponents in the exponent // marker. Previously anything greater than 10,000 would be set to 10,000. // This caused incorrect behavior if there were more than 10,000 '0's in the diff --git a/libc/test/src/stdlib/strtold_test.cpp b/libc/test/src/stdlib/strtold_test.cpp --- a/libc/test/src/stdlib/strtold_test.cpp +++ b/libc/test/src/stdlib/strtold_test.cpp @@ -147,6 +147,16 @@ UInt128(0x8803000000000000))); } +TEST_F(LlvmLibcStrToLDTest, Float80SpecificFailures) { + run_test("7777777777777777777777777777777777777777777777777777777777777777777" + "777777777777777777777777777777777", + 100, + SELECT_CONST(uint64_t(0x54ac729b8fcaf734), + (UInt128(0x414ae394dc) << 40) + UInt128(0x7e57b9a0c2), + (UInt128(0x414ac729b8fcaf73) << 64) + + UInt128(0x4184a3d793224129))); +} + TEST_F(LlvmLibcStrToLDTest, MaxSizeNumbers) { run_test("1.1897314953572317650e4932", 26, SELECT_CONST(uint64_t(0x7FF0000000000000),