diff --git a/libcxx/include/locale b/libcxx/include/locale --- a/libcxx/include/locale +++ b/libcxx/include/locale @@ -367,7 +367,8 @@ static const int __num_get_buf_sz = 40; static int __get_base(ios_base&); - static const char __src[33]; + static const int __n_atoms_float = 36; // float has the largest character set + static const char __src[__n_atoms_float + 1]; // includes null terminator }; _LIBCPP_FUNC_VIS @@ -385,7 +386,8 @@ char* __a, char*& __a_end, _CharT __decimal_point, _CharT __thousands_sep, const string& __grouping, unsigned* __g, - unsigned*& __g_end, unsigned& __dc, _CharT* __atoms); + unsigned*& __g_end, unsigned& __dc, _CharT* __atoms, + bool& __hex); #ifndef _LIBCPP_ABI_OPTIMIZED_LOCALE_NUM_GET static string __stage2_int_prep(ios_base& __iob, _CharT* __atoms, _CharT& __thousands_sep); static int __stage2_int_loop(_CharT __ct, int __base, char* __a, char*& __a_end, @@ -447,7 +449,7 @@ _CharT& __thousands_sep) { locale __loc = __iob.getloc(); - use_facet >(__loc).widen(__src, __src + 32, __atoms); + use_facet >(__loc).widen(__src, __src + __n_atoms_float, __atoms); const numpunct<_CharT>& __np = use_facet >(__loc); __decimal_point = __np.decimal_point(); __thousands_sep = __np.thousands_sep(); @@ -512,8 +514,10 @@ int __num_get<_CharT>::__stage2_float_loop(_CharT __ct, bool& __in_units, char& __exp, char* __a, char*& __a_end, _CharT __decimal_point, _CharT __thousands_sep, const string& __grouping, - unsigned* __g, unsigned*& __g_end, unsigned& __dc, _CharT* __atoms) + unsigned* __g, unsigned*& __g_end, unsigned& __dc, _CharT* __atoms, bool& __hex) { + auto uppercase = [](char x){ return x & 0x5F; }; + if (__ct == __decimal_point) { if (!__in_units) @@ -535,23 +539,42 @@ } return 0; } - ptrdiff_t __f = find(__atoms, __atoms + 32, __ct) - __atoms; - if (__f >= 32) - return -1; + + ptrdiff_t __f = find(__atoms, __atoms + __n_atoms_float, __ct) - __atoms; + const bool __is_digit = __hex ? __f < 22 : __f < 10; + const bool __first = __a_end == __a; + if(__f >= __n_atoms_float) + return -1; char __x = __src[__f]; + char __X = uppercase(__x); + + // Return early -1 for any character that is not valid at this point if (__x == '-' || __x == '+') { - if (__a_end == __a || (__a_end[-1] & 0x5F) == (__exp & 0x7F)) - { - *__a_end++ = __x; - return 0; - } - return -1; + // Previous character must be __exp, which was marked as seen setting bit 0x80 + if (!__first && uppercase(__a_end[-1]) != (__exp & 0x7F)) + return -1; } - if (__x == 'x' || __x == 'X') + else if (__x == 'x' || __x == 'X') + { + // Can't have 'x' or 'X' as the first character + if(__first) + return -1; + // Must be preceeded by a '0' + if(__a_end[-1] != __atoms[0]) + return -1; + // Can't have multiple occurrences of 'x' + if(__hex) + return -1; + __hex = true; __exp = 'P'; - else if ((__x & 0x5F) == __exp) + } + else if (__X == __exp) { + // Can't have e/E/p/P as first character + if (__first) + return -1; + // Mark exponent as seen __exp |= (char) 0x80; if (__in_units) { @@ -560,10 +583,65 @@ *__g_end++ = __dc; } } + else if (!__is_digit) { + // Not '.' or __thousands_sep or '+' or '-' or 'x' or __exp or digit. + // Special handling for the characters in INF/INFINITY/NAN. + // These must appear at the start of the sequence, possibly preceeded by + or -. + // Look back one character to check that these are part of a valid sequence. + // FIXME currently can't handle NANANANAN. + + if (__first) { + // + and - as first character are handled in a separate branch. + if (__X != 'I' && __X != 'N') + return -1; + } else { + char __prev = __src[find(__atoms, __atoms + __n_atoms_float, __a_end[-1]) - __atoms]; + char __PREV = uppercase(__prev); + + // Rule out special characters out of sequence INFINITY or NAN. + if (__X == 'I') + { + if (__prev != '+' && __prev != '-' && __PREV != 'F' && __PREV != 'N') + return -1; + } + else if (__X == 'N') + { + if (__prev != '+' && __prev != '-' && __PREV != 'I' && __PREV != 'A') + return -1; + } + else if (__X == 'F') + { + if (__PREV != 'N') + return -1; + } + else if (__X == 'T') + { + if (__PREV != 'I') + return -1; + } + else if (__X == 'Y') + { + if (__PREV != 'T') + return -1; + } + else if (__X == 'A') + { + if (__PREV != 'N') + return -1; + } + else if(!__is_digit) + { + return -1; + } + } + } + + // "...c is allowed as the next character of an input field of the conversion specifier returned by Stage 1." *__a_end++ = __x; - if (__f >= 22) - return 0; - ++__dc; + + if (__is_digit) + ++__dc; + return 0; } @@ -1015,7 +1093,7 @@ { // Stage 1, nothing to do // Stage 2 - char_type __atoms[32]; + char_type __atoms[this->__n_atoms_float]; char_type __decimal_point; char_type __thousands_sep; string __grouping = this->__stage2_float_prep(__iob, __atoms, @@ -1030,6 +1108,8 @@ unsigned __dc = 0; bool __in_units = true; char __exp = 'E'; + bool __hex = false; //< set to true when we see 0x + for (; __b != __e; ++__b) { if (__a_end == __a + __buf.size()) @@ -1043,7 +1123,7 @@ if (this->__stage2_float_loop(*__b, __in_units, __exp, __a, __a_end, __decimal_point, __thousands_sep, __grouping, __g, __g_end, - __dc, __atoms)) + __dc, __atoms, __hex)) break; } if (__grouping.size() != 0 && __in_units && __g_end-__g < __num_get_base::__num_get_buf_sz) diff --git a/libcxx/src/locale.cpp b/libcxx/src/locale.cpp --- a/libcxx/src/locale.cpp +++ b/libcxx/src/locale.cpp @@ -4559,7 +4559,8 @@ return 10; } -const char __num_get_base::__src[33] = "0123456789abcdefABCDEFxX+-pPiInN"; +const char __num_get_base::__src[__num_get_base::__n_atoms_float + 1] = + "0123456789abcdefABCDEFxX+-pPiInNtTyY"; void __check_grouping(const string& __grouping, unsigned* __g, unsigned* __g_end, diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_double.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_double.pass.cpp --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_double.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_double.pass.cpp @@ -21,6 +21,7 @@ #include "test_macros.h" #include "test_iterators.h" #include "hexfloat.h" +#include "get_float_common.h" typedef std::num_get > F; @@ -49,6 +50,38 @@ const my_facet f(1); std::ios ios(0); double v = -1; + + // Valid floating point formats where whole string is consumed + TEST("0x123.4f", 8, hexfloat(0x123, 0x4f, 0), ios.eofbit); + TEST("inf", 3, INFINITY, ios.goodbit | ios.eofbit); + TEST("INFINITY", 8, INFINITY, ios.eofbit | ios.goodbit); + + // Valid floating point formats with unparsed trailing characters + TEST("123.4f", 5, 123.4, ios.goodbit); + TEST("123xyz", 3, 123.0, ios.goodbit); + TEST("0x123.4+", 7, hexfloat(0x123, 0x4, 0), ios.goodbit); + // TEST("NININININ", 3, NAN, ios.goodbit); + // TEST("NANANANAN", 3, NAN, ios.goodbit); + + // Should't recognise e, p or x more than once + TEST("123.4e-5e-4", 8, 123.4e-5, ios.goodbit); + TEST("0x123.4p-5p-4", 10, hexfloat(0x123, 0x4, -5), ios.goodbit); + TEST("0x123x5", 5, hexfloat(0x123, 0x0, 0), ios.goodbit); + + // Invalid (non-float) inputs + TEST("a", 0, 0.0, ios.failbit); + TEST("e", 0, 0.0, ios.failbit); + TEST("f", 0, 0.0, ios.failbit); + TEST("p", 0, 0.0, ios.failbit); + TEST("M", 0, 0.0, ios.failbit); + TEST("{}", 0, 0.0, ios.failbit); + TEST("x123", 0, 0.0, ios.failbit); + + // Incomplete inputs, i.e. eof before finished parsing + TEST("-", 1, 0.0, ios.eofbit | ios.failbit); + TEST("+", 1, 0.0, ios.eofbit | ios.failbit); + TEST("0x123.4p", 8, 0.0, ios.eofbit | ios.failbit); + { const char str[] = "123"; assert((ios.flags() & ios.basefield) == ios.dec); diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_float.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_float.pass.cpp --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_float.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_float.pass.cpp @@ -21,6 +21,7 @@ #include "test_macros.h" #include "test_iterators.h" #include "hexfloat.h" +#include "get_float_common.h" typedef std::num_get > F; @@ -38,6 +39,38 @@ const my_facet f(1); std::ios ios(0); float v = -1; + + // Valid floating point formats where whole string is consumed + TEST("0x123.4f", 8, hexfloat(0x123, 0x4f, 0), ios.eofbit); + TEST("inf", 3, INFINITY, ios.goodbit | ios.eofbit); + TEST("INFINITY", 8, INFINITY, ios.eofbit | ios.goodbit); + + // Valid floating point formats with unparsed trailing characters + TEST("123.4f", 5, 123.4f, ios.goodbit); + TEST("123xyz", 3, 123.0f, ios.goodbit); + TEST("0x123.4+", 7, hexfloat(0x123, 0x4, 0), ios.goodbit); + // TEST("NININININ", 3, NAN, ios.goodbit); + // TEST("NANANANAN", 3, NAN, ios.goodbit); + + // Should't recognise e, p or x more than once + TEST("123.4e-5e-4", 8, 123.4e-5f, ios.goodbit); + TEST("0x123.4p-5p-4", 10, hexfloat(0x123, 0x4, -5), ios.goodbit); + TEST("0x123x5", 5, hexfloat(0x123, 0x0, 0), ios.goodbit); + + // Invalid (non-float) inputs + TEST("a", 0, 0.0f, ios.failbit); + TEST("e", 0, 0.0f, ios.failbit); + TEST("f", 0, 0.0f, ios.failbit); + TEST("p", 0, 0.0f, ios.failbit); + TEST("M", 0, 0.0f, ios.failbit); + TEST("{}", 0, 0.0f, ios.failbit); + TEST("x123", 0, 0.0f, ios.failbit); + + // Incomplete inputs, i.e. eof before finished parsing + TEST("-", 1, 0.0f, ios.eofbit | ios.failbit); + TEST("+", 1, 0.0f, ios.eofbit | ios.failbit); + TEST("0x123.4p", 8, 0.0f, ios.eofbit | ios.failbit); + { const char str[] = "123"; assert((ios.flags() & ios.basefield) == ios.dec); diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_float_common.h b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_float_common.h new file mode 100644 --- /dev/null +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_float_common.h @@ -0,0 +1,22 @@ +#ifndef GET_FLOAT_COMMON_H +#define GET_FLOAT_COMMON_H + +/// Read a double from the input string, check that the expected number of +/// characters are read, the expected value is returned, and the expected +/// error is set. +#define TEST(STR, EXPECTED_LEN, EXPECTED_VAL, EXPECTED_ERR) \ + { \ + std::ios_base::iostate err = ios.goodbit; \ + input_iterator iter = f.get( \ + input_iterator((STR)), \ + input_iterator((STR) + strlen((STR))), ios, err, v); \ + assert(iter.base() == (STR) + (EXPECTED_LEN) && \ + "read wrong number of characters"); \ + assert(err == (EXPECTED_ERR)); \ + if (std::isnan(EXPECTED_VAL)) \ + assert(std::isnan(v) && "expected NaN value"); \ + else \ + assert(v == (EXPECTED_VAL) && "wrong value"); \ + } + +#endif diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long_double.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long_double.pass.cpp --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long_double.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long_double.pass.cpp @@ -21,6 +21,7 @@ #include "test_macros.h" #include "test_iterators.h" #include "hexfloat.h" +#include "get_float_common.h" typedef std::num_get > F; @@ -38,6 +39,39 @@ const my_facet f(1); std::ios ios(0); long double v = -1; + + // Valid floating point formats where whole string is consumed + TEST("0x123.4f", 8, hexfloat(0x123, 0x4f, 0), ios.eofbit); + TEST("inf", 3, INFINITY, ios.goodbit | ios.eofbit); + TEST("INFINITY", 8, INFINITY, ios.eofbit | ios.goodbit); + + // Valid floating point formats with unparsed trailing characters + TEST("123.4f", 5, 123.4l, ios.goodbit); + TEST("123xyz", 3, 123.0l, ios.goodbit); + TEST("0x123.4+", 7, hexfloat(0x123, 0x4, 0), ios.goodbit); + // TEST("NININININ", 3, NAN, ios.goodbit); + // TEST("NANANANAN", 3, NAN, ios.goodbit); + + // Should't recognise e, p or x more than once + TEST("123.4e-5e-4", 8, 123.4e-5l, ios.goodbit); + TEST("0x123.4p-5p-4", 10, hexfloat(0x123, 0x4, -5), + ios.goodbit); + TEST("0x123x5", 5, hexfloat(0x123, 0x0, 0), ios.goodbit); + + // Invalid (non-float) inputs + TEST("a", 0, 0.0l, ios.failbit); + TEST("e", 0, 0.0l, ios.failbit); + TEST("f", 0, 0.0l, ios.failbit); + TEST("p", 0, 0.0l, ios.failbit); + TEST("M", 0, 0.0l, ios.failbit); + TEST("{}", 0, 0.0l, ios.failbit); + TEST("x123", 0, 0.0l, ios.failbit); + + // Incomplete inputs, i.e. eof before finished parsing + TEST("-", 1, 0.0l, ios.eofbit | ios.failbit); + TEST("+", 1, 0.0l, ios.eofbit | ios.failbit); + TEST("0x123.4p", 8, 0.0l, ios.eofbit | ios.failbit); + { const char str[] = "123"; assert((ios.flags() & ios.basefield) == ios.dec);