diff --git a/libcxx/include/__config b/libcxx/include/__config --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -110,6 +110,8 @@ # define _LIBCPP_ABI_ENABLE_UNIQUE_PTR_TRIVIAL_ABI // Enable clang::trivial_abi on std::shared_ptr and std::weak_ptr # define _LIBCPP_ABI_ENABLE_SHARED_PTR_TRIVIAL_ABI +// Enable breaking changes to stage 2 of string to float conversion in locale +# define _LIBCPP_ABI_LOCAL_NUM_GET_NEW_STAGE2_FLOAT_LOOP #elif _LIBCPP_ABI_VERSION == 1 # if !defined(_LIBCPP_OBJECT_FORMAT_COFF) // Enable compiling copies of now inline methods into the dylib to support diff --git a/libcxx/include/locale b/libcxx/include/locale --- a/libcxx/include/locale +++ b/libcxx/include/locale @@ -386,11 +386,21 @@ static string __stage2_float_prep(ios_base& __iob, _CharT* __atoms, _CharT& __decimal_point, _CharT& __thousands_sep); +#ifndef _LIBCPP_ABI_LOCAL_NUM_GET_NEW_STAGE2_FLOAT_LOOP static int __stage2_float_loop(_CharT __ct, bool& __in_units, char& __exp, char* __a, char*& __a_end, _CharT __decimal_point, _CharT __thousands_sep, const string& __grouping, unsigned* __g, unsigned*& __g_end, unsigned& __dc, _CharT* __atoms); +#else // _LIBCPP_ABI_LOCAL_NUM_GET_NEW_STAGE2_FLOAT_LOOP + static int __stage2_float_loop(_CharT __ct, bool& __in_units, char& __exp, + char* __a, char*& __a_end, + _CharT __decimal_point, _CharT __thousands_sep, + const string& __grouping, unsigned* __g, + unsigned*& __g_end, unsigned& __dc, _CharT* __atoms, + bool& __hex); +#endif // _LIBCPP_ABI_LOCAL_NUM_GET_NEW_STAGE2_FLOAT_LOOP + #ifndef _LIBCPP_ABI_OPTIMIZED_LOCALE_NUM_GET static string __stage2_int_prep(ios_base& __iob, _CharT* __atoms, _CharT& __thousands_sep); static int __stage2_int_loop(_CharT __ct, int __base, char* __a, char*& __a_end, @@ -513,6 +523,7 @@ return 0; } +#ifndef _LIBCPP_ABI_LOCAL_NUM_GET_NEW_STAGE2_FLOAT_LOOP template int __num_get<_CharT>::__stage2_float_loop(_CharT __ct, bool& __in_units, char& __exp, char* __a, char*& __a_end, @@ -569,8 +580,141 @@ if (__f >= 22) return 0; ++__dc; + return 0; } +#else // _LIBCPP_ABI_LOCAL_NUM_GET_NEW_STAGE2_FLOAT_LOOP +template +_LIBCPP_HIDDEN +int +__num_get<_CharT>::__stage2_float_loop(_CharT __ct, bool& __in_units, char& __exp, char* __a, char*& __a_end, + _CharT __decimal_point, _CharT __thousands_sep, const string& __grouping, + unsigned* __g, unsigned*& __g_end, unsigned& __dc, _CharT* __atoms, bool& __hex) +{ +#define __UPPERCASE(__x) ((__x)&0x5F) + + if (__ct == __decimal_point) + { + if (!__in_units) + return -1; + __in_units = false; + *__a_end++ = '.'; + if (__grouping.size() != 0 && __g_end-__g < __num_get_buf_sz) + *__g_end++ = __dc; + return 0; + } + if (__ct == __thousands_sep && __grouping.size() != 0) + { + if (!__in_units) + return -1; + if (__g_end-__g < __num_get_buf_sz) + { + *__g_end++ = __dc; + __dc = 0; + } + return 0; + } + ptrdiff_t __f = find(__atoms, __atoms + 32, __ct) - __atoms; + const bool __is_digit = __hex ? __f < 22 : __f < 10; + const bool __first = __a_end == __a; + if (__f >= 32) + return -1; + char __x = __src[__f]; + char __X = __UPPERCASE(__x); + + // Return early -1 for any character that is not valid at this point + if (__x == '-' || __x == '+') + { + // Previous character must be __exp, which was marked as seen setting bit 0x80 + if (!__first && __UPPERCASE(__a_end[-1]) != (__exp & 0x7F)) + return -1; + } + else if (__x == 'x' || __x == 'X') + { + // Can't have 'x' or 'X' as the first character + if (__first) + return -1; + // Must be preceeded by a '0' + if (__a_end[-1] != __atoms[0]) + return -1; + // Can't have multiple occurrences of 'x' + if (__hex) + return -1; + __hex = true; + __exp = 'P'; + } + else if (__X == __exp) + { + // Can't have e/E/p/P as first character + if (__first) + return -1; + // Mark exponent as seen + __exp |= (char) 0x80; + if (__in_units) + { + __in_units = false; + if (__grouping.size() != 0 && __g_end-__g < __num_get_buf_sz) + *__g_end++ = __dc; + } + } + else if (!__is_digit) + { + // Not '.' or __thousands_sep or '+' or '-' or 'x' or __exp or digit. + // Special handling for the characters in INF/NAN. + // These must appear at the start of the sequence, possibly preceeded by + or -. + // Look back one character to check that these are part of a valid sequence. + + if (__first) + { + // + and - as first character are handled in a separate branch. + if (__X != 'I' && __X != 'N') + return -1; + } + else + { + char __prev = + __src[find(__atoms, __atoms + 32, __a_end[-1]) - __atoms]; + char __PREV = __UPPERCASE(__prev); + + // Rule out special characters out of sequence INF or NAN. + if (__X == 'I') + { + if (__prev != '+' && __prev != '-' && __PREV != 'N') + return -1; + } + else if (__X == 'N') + { + if (__prev != '+' && __prev != '-' && __PREV != 'I' && + __PREV != 'A') + return -1; + } + else if (__X == 'F') + { + if (__PREV != 'N') + return -1; + } + else if (__X == 'A') + { + if (__PREV != 'N') + return -1; + } + else if (!__is_digit) + { + return -1; + } + } + } + + // "...c is allowed as the next character of an input field of the conversion specifier returned by Stage 1." + *__a_end++ = __x; + + if (__is_digit) + ++__dc; + + return 0; +#undef __UPPERCASE +} +#endif // _LIBCPP_ABI_LOCAL_NUM_GET_NEW_STAGE2_FLOAT_LOOP _LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(struct _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __num_get) _LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(struct _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __num_get) @@ -1035,6 +1179,10 @@ unsigned __dc = 0; bool __in_units = true; char __exp = 'E'; +#ifdef _LIBCPP_ABI_LOCAL_NUM_GET_NEW_STAGE2_FLOAT_LOOP + bool __hex = false; //< set to true when we see 0x +#endif + for (; __b != __e; ++__b) { if (__a_end == __a + __buf.size()) @@ -1045,10 +1193,17 @@ __a = &__buf[0]; __a_end = __a + __tmp; } +#ifndef _LIBCPP_ABI_LOCAL_NUM_GET_NEW_STAGE2_FLOAT_LOOP if (this->__stage2_float_loop(*__b, __in_units, __exp, __a, __a_end, __decimal_point, __thousands_sep, __grouping, __g, __g_end, __dc, __atoms)) +#else // _LIBCPP_ABI_LOCAL_NUM_GET_NEW_STAGE2_FLOAT_LOOP + if (this->__stage2_float_loop(*__b, __in_units, __exp, __a, __a_end, + __decimal_point, __thousands_sep, + __grouping, __g, __g_end, + __dc, __atoms, __hex)) +#endif // _LIBCPP_ABI_LOCAL_NUM_GET_NEW_STAGE2_FLOAT_LOOP break; } if (__grouping.size() != 0 && __in_units && __g_end-__g < __num_get_base::__num_get_buf_sz) diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_double.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_double.pass.cpp --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_double.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_double.pass.cpp @@ -21,6 +21,7 @@ #include "test_macros.h" #include "test_iterators.h" #include "hexfloat.h" +#include "get_float_common.h" typedef std::num_get > F; @@ -49,6 +50,41 @@ const my_facet f(1); std::ios ios(0); double v = -1; + +#ifdef _LIBCPP_ABI_LOCAL_NUM_GET_NEW_STAGE2_FLOAT_LOOP + // Valid floating point formats where whole string is consumed + TEST("0x123.4f", 8, (double)hexfloat(0x123, 0x4f, 0), ios.eofbit); + TEST("inf", 3, INFINITY, ios.goodbit | ios.eofbit); + TEST("Inf", 3, INFINITY, ios.goodbit | ios.eofbit); + TEST("+iNf", 4, INFINITY, ios.goodbit | ios.eofbit); + TEST("-inF", 4, -INFINITY, ios.goodbit | ios.eofbit); + TEST("INFxyz", 3, INFINITY, ios.goodbit); + + // Valid floating point formats with unparsed trailing characters + TEST("123.4f", 5, 123.4, ios.goodbit); + TEST("123xyz", 3, 123.0, ios.goodbit); + TEST("0x123.4+", 7, (double)hexfloat(0x123, 0x4, 0), ios.goodbit); + + // Shouldn't recognise e, p or x more than once + TEST("123.4e-5e-4", 8, 123.4e-5, ios.goodbit); + TEST("0x123.4p-5p-4", 10, (double)hexfloat(0x123, 0x4, -5), ios.goodbit); + TEST("0x123x5", 5, (double)hexfloat(0x123, 0x0, 0), ios.goodbit); + + // Invalid (non-float) inputs + TEST("a", 0, 0.0, ios.failbit); + TEST("e", 0, 0.0, ios.failbit); + TEST("f", 0, 0.0, ios.failbit); + TEST("p", 0, 0.0, ios.failbit); + TEST("M", 0, 0.0, ios.failbit); + TEST("{}", 0, 0.0, ios.failbit); + TEST("x123", 0, 0.0, ios.failbit); + + // Incomplete inputs, i.e. eof before finished parsing + TEST("-", 1, 0.0, ios.eofbit | ios.failbit); + TEST("+", 1, 0.0, ios.eofbit | ios.failbit); + TEST("0x123.4p", 8, 0.0, ios.eofbit | ios.failbit); +#endif + { const char str[] = "123"; assert((ios.flags() & ios.basefield) == ios.dec); diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_float.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_float.pass.cpp --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_float.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_float.pass.cpp @@ -21,6 +21,7 @@ #include "test_macros.h" #include "test_iterators.h" #include "hexfloat.h" +#include "get_float_common.h" typedef std::num_get > F; @@ -38,6 +39,41 @@ const my_facet f(1); std::ios ios(0); float v = -1; + +#ifdef _LIBCPP_ABI_LOCAL_NUM_GET_NEW_STAGE2_FLOAT_LOOP + // Valid floating point formats where whole string is consumed + TEST("0x123.4f", 8, (float)hexfloat(0x123, 0x4f, 0), ios.eofbit); + TEST("inf", 3, INFINITY, ios.goodbit | ios.eofbit); + TEST("Inf", 3, INFINITY, ios.goodbit | ios.eofbit); + TEST("+iNf", 4, INFINITY, ios.goodbit | ios.eofbit); + TEST("-inF", 4, -INFINITY, ios.goodbit | ios.eofbit); + TEST("INFxyz", 3, INFINITY, ios.goodbit); + + // Valid floating point formats with unparsed trailing characters + TEST("123.4f", 5, 123.4f, ios.goodbit); + TEST("123xyz", 3, 123.0f, ios.goodbit); + TEST("0x123.4+", 7, (float)hexfloat(0x123, 0x4, 0), ios.goodbit); + + // Shouldn't recognise e, p or x more than once + TEST("123.4e-5e-4", 8, 123.4e-5f, ios.goodbit); + TEST("0x123.4p-5p-4", 10, (float)hexfloat(0x123, 0x4, -5), ios.goodbit); + TEST("0x123x5", 5, (float)hexfloat(0x123, 0x0, 0), ios.goodbit); + + // Invalid (non-float) inputs + TEST("a", 0, 0.0f, ios.failbit); + TEST("e", 0, 0.0f, ios.failbit); + TEST("f", 0, 0.0f, ios.failbit); + TEST("p", 0, 0.0f, ios.failbit); + TEST("M", 0, 0.0f, ios.failbit); + TEST("{}", 0, 0.0f, ios.failbit); + TEST("x123", 0, 0.0f, ios.failbit); + + // Incomplete inputs, i.e. eof before finished parsing + TEST("-", 1, 0.0f, ios.eofbit | ios.failbit); + TEST("+", 1, 0.0f, ios.eofbit | ios.failbit); + TEST("0x123.4p", 8, 0.0f, ios.eofbit | ios.failbit); +#endif + { const char str[] = "123"; assert((ios.flags() & ios.basefield) == ios.dec); diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_float_common.h b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_float_common.h new file mode 100644 --- /dev/null +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_float_common.h @@ -0,0 +1,21 @@ +#ifndef GET_FLOAT_COMMON_H +#define GET_FLOAT_COMMON_H + +/// Read a double from the input string, check that the expected number of +/// characters are read, the expected value is returned, and the expected +/// error is set. +#define TEST(STR, EXPECTED_LEN, EXPECTED_VAL, EXPECTED_ERR) \ + { \ + std::ios_base::iostate err = ios.goodbit; \ + cpp17_input_iterator iter = \ + f.get(cpp17_input_iterator((STR)), cpp17_input_iterator((STR) + strlen((STR))), ios, \ + err, v); \ + assert(iter.base() == (STR) + (EXPECTED_LEN) && "read wrong number of characters"); \ + assert(err == (EXPECTED_ERR)); \ + if (std::isnan(EXPECTED_VAL)) \ + assert(std::isnan(v) && "expected NaN value"); \ + else \ + assert(v == (EXPECTED_VAL) && "wrong value"); \ + } + +#endif diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long_double.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long_double.pass.cpp --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long_double.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long_double.pass.cpp @@ -21,6 +21,7 @@ #include "test_macros.h" #include "test_iterators.h" #include "hexfloat.h" +#include "get_float_common.h" typedef std::num_get > F; @@ -38,6 +39,41 @@ const my_facet f(1); std::ios ios(0); long double v = -1; + +#ifdef _LIBCPP_ABI_LOCAL_NUM_GET_NEW_STAGE2_FLOAT_LOOP + // Valid floating point formats where whole string is consumed + TEST("0x123.4f", 8, (long double)hexfloat(0x123, 0x4f, 0), ios.eofbit); + TEST("inf", 3, INFINITY, ios.goodbit | ios.eofbit); + TEST("Inf", 3, INFINITY, ios.goodbit | ios.eofbit); + TEST("+iNf", 4, INFINITY, ios.goodbit | ios.eofbit); + TEST("-inF", 4, -INFINITY, ios.goodbit | ios.eofbit); + TEST("INFxyz", 3, INFINITY, ios.goodbit); + + // Valid floating point formats with unparsed trailing characters + TEST("123.4f", 5, 123.4l, ios.goodbit); + TEST("123xyz", 3, 123.0l, ios.goodbit); + TEST("0x123.4+", 7, (long double)hexfloat(0x123, 0x4, 0), ios.goodbit); + + // Shouldn't recognise e, p or x more than once + TEST("123.4e-5e-4", 8, 123.4e-5l, ios.goodbit); + TEST("0x123.4p-5p-4", 10, (long double)hexfloat(0x123, 0x4, -5), ios.goodbit); + TEST("0x123x5", 5, (long double)hexfloat(0x123, 0x0, 0), ios.goodbit); + + // Invalid (non-float) inputs + TEST("a", 0, 0.0l, ios.failbit); + TEST("e", 0, 0.0l, ios.failbit); + TEST("f", 0, 0.0l, ios.failbit); + TEST("p", 0, 0.0l, ios.failbit); + TEST("M", 0, 0.0l, ios.failbit); + TEST("{}", 0, 0.0l, ios.failbit); + TEST("x123", 0, 0.0l, ios.failbit); + + // Incomplete inputs, i.e. eof before finished parsing + TEST("-", 1, 0.0l, ios.eofbit | ios.failbit); + TEST("+", 1, 0.0l, ios.eofbit | ios.failbit); + TEST("0x123.4p", 8, 0.0l, ios.eofbit | ios.failbit); +#endif + { const char str[] = "123"; assert((ios.flags() & ios.basefield) == ios.dec);