Index: src/locale.cpp =================================================================== --- src/locale.cpp +++ src/locale.cpp @@ -1785,9 +1785,13 @@ // 040000 - 0FFFFF D8C0 - DBBF, DC00 - DFFF F1 - F3, 80 - BF, 80 - BF, 80 - BF 786432 // 100000 - 10FFFF DBC0 - DBFF, DC00 - DFFF F4 - F4, 80 - 8F, 80 - BF, 80 - BF 65536 -static -codecvt_base::result -utf16_to_utf8(const uint16_t* frm, const uint16_t* frm_end, const uint16_t*& frm_nxt, +template +typename enable_if +< + is_same<_FrmType, uint16_t>::value || is_same<_FrmType, uint32_t>::value, + codecvt_base::result +>::type +utf16_to_utf8(const _FrmType* frm, const _FrmType* frm_end, const _FrmType*& frm_nxt, uint8_t* to, uint8_t* to_end, uint8_t*& to_nxt, unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0)) { @@ -1803,83 +1807,6 @@ } for (; frm_nxt < frm_end; ++frm_nxt) { - uint16_t wc1 = *frm_nxt; - if (wc1 > Maxcode) - return codecvt_base::error; - if (wc1 < 0x0080) - { - if (to_end-to_nxt < 1) - return codecvt_base::partial; - *to_nxt++ = static_cast(wc1); - } - else if (wc1 < 0x0800) - { - if (to_end-to_nxt < 2) - return codecvt_base::partial; - *to_nxt++ = static_cast(0xC0 | (wc1 >> 6)); - *to_nxt++ = static_cast(0x80 | (wc1 & 0x03F)); - } - else if (wc1 < 0xD800) - { - if (to_end-to_nxt < 3) - return codecvt_base::partial; - *to_nxt++ = static_cast(0xE0 | (wc1 >> 12)); - *to_nxt++ = static_cast(0x80 | ((wc1 & 0x0FC0) >> 6)); - *to_nxt++ = static_cast(0x80 | (wc1 & 0x003F)); - } - else if (wc1 < 0xDC00) - { - if (frm_end-frm_nxt < 2) - return codecvt_base::partial; - uint16_t wc2 = frm_nxt[1]; - if ((wc2 & 0xFC00) != 0xDC00) - return codecvt_base::error; - if (to_end-to_nxt < 4) - return codecvt_base::partial; - if (((((wc1 & 0x03C0UL) >> 6) + 1) << 16) + - ((wc1 & 0x003FUL) << 10) + (wc2 & 0x03FF) > Maxcode) - return codecvt_base::error; - ++frm_nxt; - uint8_t z = ((wc1 & 0x03C0) >> 6) + 1; - *to_nxt++ = static_cast(0xF0 | (z >> 2)); - *to_nxt++ = static_cast(0x80 | ((z & 0x03) << 4) | ((wc1 & 0x003C) >> 2)); - *to_nxt++ = static_cast(0x80 | ((wc1 & 0x0003) << 4) | ((wc2 & 0x03C0) >> 6)); - *to_nxt++ = static_cast(0x80 | (wc2 & 0x003F)); - } - else if (wc1 < 0xE000) - { - return codecvt_base::error; - } - else - { - if (to_end-to_nxt < 3) - return codecvt_base::partial; - *to_nxt++ = static_cast(0xE0 | (wc1 >> 12)); - *to_nxt++ = static_cast(0x80 | ((wc1 & 0x0FC0) >> 6)); - *to_nxt++ = static_cast(0x80 | (wc1 & 0x003F)); - } - } - return codecvt_base::ok; -} - -static -codecvt_base::result -utf16_to_utf8(const uint32_t* frm, const uint32_t* frm_end, const uint32_t*& frm_nxt, - uint8_t* to, uint8_t* to_end, uint8_t*& to_nxt, - unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0)) -{ - frm_nxt = frm; - to_nxt = to; - if (mode & generate_header) - { - if (to_end-to_nxt < 3) - return codecvt_base::partial; - *to_nxt++ = static_cast(0xEF); - *to_nxt++ = static_cast(0xBB); - *to_nxt++ = static_cast(0xBF); - } - for (; frm_nxt < frm_end; ++frm_nxt) - { uint16_t wc1 = static_cast(*frm_nxt); if (wc1 > Maxcode) return codecvt_base::error; @@ -1939,10 +1866,14 @@ return codecvt_base::ok; } -static -codecvt_base::result +template +typename enable_if +< + is_same<_ToType, uint16_t>::value || is_same<_ToType, uint32_t>::value, + codecvt_base::result +>::type utf8_to_utf16(const uint8_t* frm, const uint8_t* frm_end, const uint8_t*& frm_nxt, - uint16_t* to, uint16_t* to_end, uint16_t*& to_nxt, + _ToType* to, _ToType* to_end, _ToType*& to_nxt, unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0)) { frm_nxt = frm; @@ -1960,7 +1891,7 @@ return codecvt_base::error; if (c1 < 0x80) { - *to_nxt = static_cast(c1); + *to_nxt = static_cast<_ToType>(c1); ++frm_nxt; } else if (c1 < 0xC2) @@ -1977,7 +1908,7 @@ uint16_t t = static_cast(((c1 & 0x1F) << 6) | (c2 & 0x3F)); if (t > Maxcode) return codecvt_base::error; - *to_nxt = t; + *to_nxt = static_cast<_ToType>(t); frm_nxt += 2; } else if (c1 < 0xF0) @@ -2008,7 +1939,7 @@ | (c3 & 0x3F)); if (t > Maxcode) return codecvt_base::error; - *to_nxt = t; + *to_nxt = static_cast<_ToType>(t); frm_nxt += 3; } else if (c1 < 0xF5) @@ -2041,12 +1972,12 @@ ((c2 & 0x3FUL) << 12) + ((c3 & 0x3FUL) << 6) + (c4 & 0x3F)) > Maxcode) return codecvt_base::error; - *to_nxt = static_cast( + *to_nxt = static_cast<_ToType>( 0xD800 | (((((c1 & 0x07) << 2) | ((c2 & 0x30) >> 4)) - 1) << 6) | ((c2 & 0x0F) << 2) | ((c3 & 0x30) >> 4)); - *++to_nxt = static_cast( + *++to_nxt = static_cast<_ToType>( 0xDC00 | ((c3 & 0x0F) << 6) | (c4 & 0x3F)); @@ -2061,127 +1992,6 @@ } static -codecvt_base::result -utf8_to_utf16(const uint8_t* frm, const uint8_t* frm_end, const uint8_t*& frm_nxt, - uint32_t* to, uint32_t* to_end, uint32_t*& to_nxt, - unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0)) -{ - frm_nxt = frm; - to_nxt = to; - if (mode & consume_header) - { - if (frm_end-frm_nxt >= 3 && frm_nxt[0] == 0xEF && frm_nxt[1] == 0xBB && - frm_nxt[2] == 0xBF) - frm_nxt += 3; - } - for (; frm_nxt < frm_end && to_nxt < to_end; ++to_nxt) - { - uint8_t c1 = *frm_nxt; - if (c1 > Maxcode) - return codecvt_base::error; - if (c1 < 0x80) - { - *to_nxt = static_cast(c1); - ++frm_nxt; - } - else if (c1 < 0xC2) - { - return codecvt_base::error; - } - else if (c1 < 0xE0) - { - if (frm_end-frm_nxt < 2) - return codecvt_base::partial; - uint8_t c2 = frm_nxt[1]; - if ((c2 & 0xC0) != 0x80) - return codecvt_base::error; - uint16_t t = static_cast(((c1 & 0x1F) << 6) | (c2 & 0x3F)); - if (t > Maxcode) - return codecvt_base::error; - *to_nxt = static_cast(t); - frm_nxt += 2; - } - else if (c1 < 0xF0) - { - if (frm_end-frm_nxt < 3) - return codecvt_base::partial; - uint8_t c2 = frm_nxt[1]; - uint8_t c3 = frm_nxt[2]; - switch (c1) - { - case 0xE0: - if ((c2 & 0xE0) != 0xA0) - return codecvt_base::error; - break; - case 0xED: - if ((c2 & 0xE0) != 0x80) - return codecvt_base::error; - break; - default: - if ((c2 & 0xC0) != 0x80) - return codecvt_base::error; - break; - } - if ((c3 & 0xC0) != 0x80) - return codecvt_base::error; - uint16_t t = static_cast(((c1 & 0x0F) << 12) - | ((c2 & 0x3F) << 6) - | (c3 & 0x3F)); - if (t > Maxcode) - return codecvt_base::error; - *to_nxt = static_cast(t); - frm_nxt += 3; - } - else if (c1 < 0xF5) - { - if (frm_end-frm_nxt < 4) - return codecvt_base::partial; - uint8_t c2 = frm_nxt[1]; - uint8_t c3 = frm_nxt[2]; - uint8_t c4 = frm_nxt[3]; - switch (c1) - { - case 0xF0: - if (!(0x90 <= c2 && c2 <= 0xBF)) - return codecvt_base::error; - break; - case 0xF4: - if ((c2 & 0xF0) != 0x80) - return codecvt_base::error; - break; - default: - if ((c2 & 0xC0) != 0x80) - return codecvt_base::error; - break; - } - if ((c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80) - return codecvt_base::error; - if (to_end-to_nxt < 2) - return codecvt_base::partial; - if ((((c1 & 7UL) << 18) + - ((c2 & 0x3FUL) << 12) + - ((c3 & 0x3FUL) << 6) + (c4 & 0x3F)) > Maxcode) - return codecvt_base::error; - *to_nxt = static_cast( - 0xD800 - | (((((c1 & 0x07) << 2) | ((c2 & 0x30) >> 4)) - 1) << 6) - | ((c2 & 0x0F) << 2) - | ((c3 & 0x30) >> 4)); - *++to_nxt = static_cast( - 0xDC00 - | ((c3 & 0x0F) << 6) - | (c4 & 0x3F)); - frm_nxt += 4; - } - else - { - return codecvt_base::error; - } - } - return frm_nxt < frm_end ? codecvt_base::partial : codecvt_base::ok; -} - -static int utf8_to_utf16_length(const uint8_t* frm, const uint8_t* frm_end, size_t mx, unsigned long Maxcode = 0x10FFFF, @@ -3319,7 +3129,7 @@ const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt, extern_type* to, extern_type* to_end, extern_type*& to_nxt) const { -#if _WIN32 +#if _WIN32 || __SIZEOF_WCHAR_T__ == 2 const uint16_t* _frm = reinterpret_cast(frm); const uint16_t* _frm_end = reinterpret_cast(frm_end); const uint16_t* _frm_nxt = _frm; @@ -3331,7 +3141,7 @@ uint8_t* _to = reinterpret_cast(to); uint8_t* _to_end = reinterpret_cast(to_end); uint8_t* _to_nxt = _to; -#if _WIN32 +#if _WIN32 || __SIZEOF_WCHAR_T__ == 2 result r = ucs2_to_utf8(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt, _Maxcode_, _Mode_); #else @@ -3351,7 +3161,7 @@ const uint8_t* _frm = reinterpret_cast(frm); const uint8_t* _frm_end = reinterpret_cast(frm_end); const uint8_t* _frm_nxt = _frm; -#if _WIN32 +#if _WIN32 || __SIZEOF_WCHAR_T__ == 2 uint16_t* _to = reinterpret_cast(to); uint16_t* _to_end = reinterpret_cast(to_end); uint16_t* _to_nxt = _to; @@ -3563,14 +3373,24 @@ const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt, extern_type* to, extern_type* to_end, extern_type*& to_nxt) const { - const uint32_t* _frm = reinterpret_cast(frm); - const uint32_t* _frm_end = reinterpret_cast(frm_end); - const uint32_t* _frm_nxt = _frm; +#if _WIN32 || __SIZEOF_WCHAR_T__ == 2 + typedef uint16_t FrmType; +#else + typedef uint32_t FrmType; +#endif + const FrmType* _frm = reinterpret_cast(frm); + const FrmType* _frm_end = reinterpret_cast(frm_end); + const FrmType* _frm_nxt = _frm; uint8_t* _to = reinterpret_cast(to); uint8_t* _to_end = reinterpret_cast(to_end); uint8_t* _to_nxt = _to; +#if _WIN32 || __SIZEOF_WCHAR_T__ == 2 + result r = ucs2_to_utf16be(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt, + _Maxcode_, _Mode_); +#else result r = ucs4_to_utf16be(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt, _Maxcode_, _Mode_); +#endif frm_nxt = frm + (_frm_nxt - _frm); to_nxt = to + (_to_nxt - _to); return r; @@ -3584,11 +3404,21 @@ const uint8_t* _frm = reinterpret_cast(frm); const uint8_t* _frm_end = reinterpret_cast(frm_end); const uint8_t* _frm_nxt = _frm; - uint32_t* _to = reinterpret_cast(to); - uint32_t* _to_end = reinterpret_cast(to_end); - uint32_t* _to_nxt = _to; +#if _WIN32 || __SIZEOF_WCHAR_T__ == 2 + typedef uint16_t ToType; +#else + typedef uint32_t ToType; +#endif + ToType* _to = reinterpret_cast(to); + ToType* _to_end = reinterpret_cast(to_end); + ToType* _to_nxt = _to; +#if _WIN32 || __SIZEOF_WCHAR_T__ == 2 + result r = utf16be_to_ucs2(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt, + _Maxcode_, _Mode_); +#else result r = utf16be_to_ucs4(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt, _Maxcode_, _Mode_); +#endif frm_nxt = frm + (_frm_nxt - _frm); to_nxt = to + (_to_nxt - _to); return r; @@ -3638,14 +3468,24 @@ const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt, extern_type* to, extern_type* to_end, extern_type*& to_nxt) const { - const uint32_t* _frm = reinterpret_cast(frm); - const uint32_t* _frm_end = reinterpret_cast(frm_end); - const uint32_t* _frm_nxt = _frm; +#if _WIN32 || __SIZEOF_WCHAR_T__ == 2 + typedef uint16_t FrmType; +#else + typedef uint32_t FrmType; +#endif + const FrmType* _frm = reinterpret_cast(frm); + const FrmType* _frm_end = reinterpret_cast(frm_end); + const FrmType* _frm_nxt = _frm; uint8_t* _to = reinterpret_cast(to); uint8_t* _to_end = reinterpret_cast(to_end); uint8_t* _to_nxt = _to; +#if _WIN32 || __SIZEOF_WCHAR_T__ == 2 + result r = ucs2_to_utf16le(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt, + _Maxcode_, _Mode_); +#else result r = ucs4_to_utf16le(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt, _Maxcode_, _Mode_); +#endif frm_nxt = frm + (_frm_nxt - _frm); to_nxt = to + (_to_nxt - _to); return r; @@ -3659,11 +3499,21 @@ const uint8_t* _frm = reinterpret_cast(frm); const uint8_t* _frm_end = reinterpret_cast(frm_end); const uint8_t* _frm_nxt = _frm; - uint32_t* _to = reinterpret_cast(to); - uint32_t* _to_end = reinterpret_cast(to_end); - uint32_t* _to_nxt = _to; +#if _WIN32 || __SIZEOF_WCHAR_T__ == 2 + typedef uint16_t ToType; +#else + typedef uint32_t ToType; +#endif + ToType* _to = reinterpret_cast(to); + ToType* _to_end = reinterpret_cast(to_end); + ToType* _to_nxt = _to; +#if _WIN32 || __SIZEOF_WCHAR_T__ == 2 + result r = utf16le_to_ucs2(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt, + _Maxcode_, _Mode_); +#else result r = utf16le_to_ucs4(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt, _Maxcode_, _Mode_); +#endif frm_nxt = frm + (_frm_nxt - _frm); to_nxt = to + (_to_nxt - _to); return r; @@ -4013,9 +3863,14 @@ const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt, extern_type* to, extern_type* to_end, extern_type*& to_nxt) const { - const uint32_t* _frm = reinterpret_cast(frm); - const uint32_t* _frm_end = reinterpret_cast(frm_end); - const uint32_t* _frm_nxt = _frm; +#if _WIN32 || __SIZEOF_WCHAR_T__ == 2 + typedef uint16_t FrmType; +#else + typedef uint32_t FrmType; +#endif + const FrmType* _frm = reinterpret_cast(frm); + const FrmType* _frm_end = reinterpret_cast(frm_end); + const FrmType* _frm_nxt = _frm; uint8_t* _to = reinterpret_cast(to); uint8_t* _to_end = reinterpret_cast(to_end); uint8_t* _to_nxt = _to; @@ -4034,9 +3889,14 @@ const uint8_t* _frm = reinterpret_cast(frm); const uint8_t* _frm_end = reinterpret_cast(frm_end); const uint8_t* _frm_nxt = _frm; - uint32_t* _to = reinterpret_cast(to); - uint32_t* _to_end = reinterpret_cast(to_end); - uint32_t* _to_nxt = _to; +#if _WIN32 || __SIZEOF_WCHAR_T__ == 2 + typedef uint16_t ToType; +#else + typedef uint32_t ToType; +#endif + ToType* _to = reinterpret_cast(to); + ToType* _to_end = reinterpret_cast(to_end); + ToType* _to_nxt = _to; result r = utf8_to_utf16(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt, _Maxcode_, _Mode_); frm_nxt = frm + (_frm_nxt - _frm); Index: test/std/localization/locale.stdcvt/codecvt_utf16_in.pass.cpp =================================================================== --- test/std/localization/locale.stdcvt/codecvt_utf16_in.pass.cpp +++ test/std/localization/locale.stdcvt/codecvt_utf16_in.pass.cpp @@ -25,6 +25,14 @@ #include #include +#if _WIN32 + #define SIZEOF_WCHAR_T 2 +#elif __SIZEOF_WCHAR_T__ + #define SIZEOF_WCHAR_T __SIZEOF_WCHAR_T__ +#else + #define SIZEOF_WCHAR_T 4 +#endif + int main() { { @@ -35,11 +43,14 @@ wchar_t* wp = nullptr; std::mbstate_t m; const char* np = nullptr; - std::codecvt_base::result r = c.in(m, n, n+4, np, &w, &w+1, wp); + std::codecvt_base::result r; +#if SIZEOF_WCHAR_T == 4 + r = c.in(m, n, n+4, np, &w, &w+1, wp); assert(r == std::codecvt_base::ok); assert(wp == &w+1); assert(np == n+4); assert(w == 0x40003); +#endif n[0] = char(0x10); n[1] = char(0x05); @@ -113,11 +124,14 @@ wchar_t* wp = nullptr; std::mbstate_t m; const char* np = nullptr; - std::codecvt_base::result r = c.in(m, n, n+6, np, &w, &w+1, wp); + std::codecvt_base::result r; +#if SIZEOF_WCHAR_T == 4 + r = c.in(m, n, n+6, np, &w, &w+1, wp); assert(r == std::codecvt_base::ok); assert(wp == &w+1); assert(np == n+6); assert(w == 0x40003); +#endif n[0] = char(0x10); n[1] = char(0x05); @@ -152,11 +166,14 @@ wchar_t* wp = nullptr; std::mbstate_t m; const char* np = nullptr; - std::codecvt_base::result r = c.in(m, n, n+4, np, &w, &w+1, wp); + std::codecvt_base::result r; +#if SIZEOF_WCHAR_T == 4 + r = c.in(m, n, n+4, np, &w, &w+1, wp); assert(r == std::codecvt_base::ok); assert(wp == &w+1); assert(np == n+4); assert(w == 0x40003); +#endif n[1] = char(0x10); n[0] = char(0x05); @@ -232,11 +249,14 @@ wchar_t* wp = nullptr; std::mbstate_t m; const char* np = nullptr; - std::codecvt_base::result r = c.in(m, n, n+6, np, &w, &w+1, wp); + std::codecvt_base::result r; +#if SIZEOF_WCHAR_T == 4 + r = c.in(m, n, n+6, np, &w, &w+1, wp); assert(r == std::codecvt_base::ok); assert(wp == &w+1); assert(np == n+6); assert(w == 0x40003); +#endif n[1] = char(0x10); n[0] = char(0x05); Index: test/std/localization/locale.stdcvt/codecvt_utf16_out.pass.cpp =================================================================== --- test/std/localization/locale.stdcvt/codecvt_utf16_out.pass.cpp +++ test/std/localization/locale.stdcvt/codecvt_utf16_out.pass.cpp @@ -25,17 +25,28 @@ #include #include +#if _WIN32 + #define SIZEOF_WCHAR_T 2 +#elif __SIZEOF_WCHAR_T__ + #define SIZEOF_WCHAR_T __SIZEOF_WCHAR_T__ +#else + #define SIZEOF_WCHAR_T 4 +#endif + int main() { { typedef std::codecvt_utf16 C; C c; - wchar_t w = 0x40003; + wchar_t w; char n[4] = {0}; const wchar_t* wp = nullptr; std::mbstate_t m; char* np = nullptr; - std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+4, np); + std::codecvt_base::result r; +#if SIZEOF_WCHAR_T == 4 + w = 0x40003; + r = c.out(m, &w, &w+1, wp, n, n+4, np); assert(r == std::codecvt_base::ok); assert(wp == &w+1); assert(np == n+4); @@ -43,6 +54,7 @@ assert(n[1] == char(0xC0)); assert(n[2] == char(0xDC)); assert(n[3] == char(0x03)); +#endif w = 0x1005; r = c.out(m, &w, &w+1, wp, n, n+4, np); @@ -51,8 +63,13 @@ assert(np == n+2); assert(n[0] == char(0x10)); assert(n[1] == char(0x05)); +#if SIZEOF_WCHAR_T == 4 assert(n[2] == char(0xDC)); assert(n[3] == char(0x03)); +#else + assert(n[2] == char(0)); + assert(n[3] == char(0)); +#endif w = 0x453; r = c.out(m, &w, &w+1, wp, n, n+4, np); @@ -61,8 +78,13 @@ assert(np == n+2); assert(n[0] == char(0x04)); assert(n[1] == char(0x53)); +#if SIZEOF_WCHAR_T == 4 assert(n[2] == char(0xDC)); assert(n[3] == char(0x03)); +#else + assert(n[2] == char(0)); + assert(n[3] == char(0)); +#endif w = 0x56; r = c.out(m, &w, &w+1, wp, n, n+4, np); @@ -71,18 +93,26 @@ assert(np == n+2); assert(n[0] == char(0x00)); assert(n[1] == char(0x56)); +#if SIZEOF_WCHAR_T == 4 assert(n[2] == char(0xDC)); assert(n[3] == char(0x03)); +#else + assert(n[2] == char(0)); + assert(n[3] == char(0)); +#endif } { typedef std::codecvt_utf16 C; C c; - wchar_t w = 0x40003; + wchar_t w; char n[4] = {0}; const wchar_t* wp = nullptr; std::mbstate_t m; char* np = nullptr; - std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+4, np); + std::codecvt_base::result r; +#if SIZEOF_WCHAR_T == 4 + w = 0x40003; + r = c.out(m, &w, &w+1, wp, n, n+4, np); assert(r == std::codecvt_base::error); assert(wp == &w); assert(np == n); @@ -90,6 +120,7 @@ assert(n[1] == char(0)); assert(n[2] == char(0)); assert(n[3] == char(0)); +#endif w = 0x1005; r = c.out(m, &w, &w+1, wp, n, n+4, np); @@ -124,12 +155,15 @@ { typedef std::codecvt_utf16 C; C c; - wchar_t w = 0x40003; + wchar_t w; char n[6] = {0}; const wchar_t* wp = nullptr; std::mbstate_t m; char* np = nullptr; - std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+6, np); + std::codecvt_base::result r; +#if SIZEOF_WCHAR_T == 4 + w = 0x40003; + r = c.out(m, &w, &w+1, wp, n, n+6, np); assert(r == std::codecvt_base::ok); assert(wp == &w+1); assert(np == n+6); @@ -139,6 +173,7 @@ assert(n[3] == char(0xC0)); assert(n[4] == char(0xDC)); assert(n[5] == char(0x03)); +#endif w = 0x1005; r = c.out(m, &w, &w+1, wp, n, n+6, np); @@ -149,8 +184,13 @@ assert(n[1] == char(0xFF)); assert(n[2] == char(0x10)); assert(n[3] == char(0x05)); +#if SIZEOF_WCHAR_T == 4 assert(n[4] == char(0xDC)); assert(n[5] == char(0x03)); +#else + assert(n[4] == char(0)); + assert(n[5] == char(0)); +#endif w = 0x453; r = c.out(m, &w, &w+1, wp, n, n+6, np); @@ -161,8 +201,13 @@ assert(n[1] == char(0xFF)); assert(n[2] == char(0x04)); assert(n[3] == char(0x53)); +#if SIZEOF_WCHAR_T == 4 assert(n[4] == char(0xDC)); assert(n[5] == char(0x03)); +#else + assert(n[4] == char(0)); + assert(n[5] == char(0)); +#endif w = 0x56; r = c.out(m, &w, &w+1, wp, n, n+6, np); @@ -173,19 +218,27 @@ assert(n[1] == char(0xFF)); assert(n[2] == char(0x00)); assert(n[3] == char(0x56)); +#if SIZEOF_WCHAR_T == 4 assert(n[4] == char(0xDC)); assert(n[5] == char(0x03)); +#else + assert(n[4] == char(0)); + assert(n[5] == char(0)); +#endif } { typedef std::codecvt_utf16 C; C c; - wchar_t w = 0x40003; + wchar_t w; char n[4] = {0}; const wchar_t* wp = nullptr; std::mbstate_t m; char* np = nullptr; - std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+4, np); + std::codecvt_base::result r; +#if SIZEOF_WCHAR_T == 4 + w = 0x40003; + r = c.out(m, &w, &w+1, wp, n, n+4, np); assert(r == std::codecvt_base::ok); assert(wp == &w+1); assert(np == n+4); @@ -193,6 +246,7 @@ assert(n[0] == char(0xC0)); assert(n[3] == char(0xDC)); assert(n[2] == char(0x03)); +#endif w = 0x1005; r = c.out(m, &w, &w+1, wp, n, n+4, np); @@ -201,8 +255,13 @@ assert(np == n+2); assert(n[1] == char(0x10)); assert(n[0] == char(0x05)); +#if SIZEOF_WCHAR_T == 4 assert(n[3] == char(0xDC)); assert(n[2] == char(0x03)); +#else + assert(n[3] == char(0)); + assert(n[2] == char(0)); +#endif w = 0x453; r = c.out(m, &w, &w+1, wp, n, n+4, np); @@ -211,8 +270,13 @@ assert(np == n+2); assert(n[1] == char(0x04)); assert(n[0] == char(0x53)); +#if SIZEOF_WCHAR_T == 4 assert(n[3] == char(0xDC)); assert(n[2] == char(0x03)); +#else + assert(n[3] == char(0)); + assert(n[2] == char(0)); +#endif w = 0x56; r = c.out(m, &w, &w+1, wp, n, n+4, np); @@ -221,18 +285,26 @@ assert(np == n+2); assert(n[1] == char(0x00)); assert(n[0] == char(0x56)); +#if SIZEOF_WCHAR_T == 4 assert(n[3] == char(0xDC)); assert(n[2] == char(0x03)); +#else + assert(n[3] == char(0)); + assert(n[2] == char(0)); +#endif } { typedef std::codecvt_utf16 C; C c; - wchar_t w = 0x40003; + wchar_t w; char n[4] = {0}; const wchar_t* wp = nullptr; std::mbstate_t m; char* np = nullptr; - std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+4, np); + std::codecvt_base::result r; +#if SIZEOF_WCHAR_T == 4 + w = 0x40003; + r = c.out(m, &w, &w+1, wp, n, n+4, np); assert(r == std::codecvt_base::error); assert(wp == &w); assert(np == n); @@ -240,6 +312,7 @@ assert(n[0] == char(0)); assert(n[3] == char(0)); assert(n[2] == char(0)); +#endif w = 0x1005; r = c.out(m, &w, &w+1, wp, n, n+4, np); @@ -276,12 +349,15 @@ std::generate_header | std::little_endian)> C; C c; - wchar_t w = 0x40003; + wchar_t w; char n[6] = {0}; const wchar_t* wp = nullptr; std::mbstate_t m; char* np = nullptr; - std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+6, np); + std::codecvt_base::result r; +#if SIZEOF_WCHAR_T == 4 + w = 0x40003; + r = c.out(m, &w, &w+1, wp, n, n+6, np); assert(r == std::codecvt_base::ok); assert(wp == &w+1); assert(np == n+6); @@ -291,6 +367,7 @@ assert(n[2] == char(0xC0)); assert(n[5] == char(0xDC)); assert(n[4] == char(0x03)); +#endif w = 0x1005; r = c.out(m, &w, &w+1, wp, n, n+6, np); @@ -301,8 +378,13 @@ assert(n[0] == char(0xFF)); assert(n[3] == char(0x10)); assert(n[2] == char(0x05)); +#if SIZEOF_WCHAR_T == 4 assert(n[5] == char(0xDC)); assert(n[4] == char(0x03)); +#else + assert(n[5] == char(0)); + assert(n[4] == char(0)); +#endif w = 0x453; r = c.out(m, &w, &w+1, wp, n, n+6, np); @@ -313,8 +395,13 @@ assert(n[0] == char(0xFF)); assert(n[3] == char(0x04)); assert(n[2] == char(0x53)); +#if SIZEOF_WCHAR_T == 4 assert(n[5] == char(0xDC)); assert(n[4] == char(0x03)); +#else + assert(n[5] == char(0)); + assert(n[4] == char(0)); +#endif w = 0x56; r = c.out(m, &w, &w+1, wp, n, n+6, np); @@ -325,7 +412,12 @@ assert(n[0] == char(0xFF)); assert(n[3] == char(0x00)); assert(n[2] == char(0x56)); +#if SIZEOF_WCHAR_T == 4 assert(n[5] == char(0xDC)); assert(n[4] == char(0x03)); +#else + assert(n[5] == char(0)); + assert(n[4] == char(0)); +#endif } } Index: test/std/localization/locale.stdcvt/codecvt_utf8_in.pass.cpp =================================================================== --- test/std/localization/locale.stdcvt/codecvt_utf8_in.pass.cpp +++ test/std/localization/locale.stdcvt/codecvt_utf8_in.pass.cpp @@ -25,6 +25,14 @@ #include #include +#if _WIN32 + #define SIZEOF_WCHAR_T 2 +#elif __SIZEOF_WCHAR_T__ + #define SIZEOF_WCHAR_T __SIZEOF_WCHAR_T__ +#else + #define SIZEOF_WCHAR_T 4 +#endif + int main() { { @@ -35,11 +43,14 @@ wchar_t* wp = nullptr; std::mbstate_t m; const char* np = nullptr; - std::codecvt_base::result r = c.in(m, n, n+4, np, &w, &w+1, wp); + std::codecvt_base::result r; +#if SIZEOF_WCHAR_T == 4 + r = c.in(m, n, n+4, np, &w, &w+1, wp); assert(r == std::codecvt_base::ok); assert(wp == &w+1); assert(np == n+4); assert(w == 0x40003); +#endif n[0] = char(0xE1); n[1] = char(0x80); @@ -113,11 +124,14 @@ wchar_t* wp = nullptr; std::mbstate_t m; const char* np = nullptr; - std::codecvt_base::result r = c.in(m, n, n+7, np, &w, &w+1, wp); + std::codecvt_base::result r; +#if SIZEOF_WCHAR_T == 4 + r = c.in(m, n, n+7, np, &w, &w+1, wp); assert(r == std::codecvt_base::ok); assert(wp == &w+1); assert(np == n+7); assert(w == 0x40003); +#endif n[0] = char(0xE1); n[1] = char(0x80); Index: test/std/localization/locale.stdcvt/codecvt_utf8_out.pass.cpp =================================================================== --- test/std/localization/locale.stdcvt/codecvt_utf8_out.pass.cpp +++ test/std/localization/locale.stdcvt/codecvt_utf8_out.pass.cpp @@ -25,17 +25,28 @@ #include #include +#if _WIN32 + #define SIZEOF_WCHAR_T 2 +#elif __SIZEOF_WCHAR_T__ + #define SIZEOF_WCHAR_T __SIZEOF_WCHAR_T__ +#else + #define SIZEOF_WCHAR_T 4 +#endif + int main() { { typedef std::codecvt_utf8 C; C c; - wchar_t w = 0x40003; + wchar_t w; char n[4] = {0}; const wchar_t* wp = nullptr; std::mbstate_t m; char* np = nullptr; - std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+4, np); + std::codecvt_base::result r; +#if SIZEOF_WCHAR_T == 4 + w = 0x40003; + r = c.out(m, &w, &w+1, wp, n, n+4, np); assert(r == std::codecvt_base::ok); assert(wp == &w+1); assert(np == n+4); @@ -43,6 +54,7 @@ assert(n[1] == char(0x80)); assert(n[2] == char(0x80)); assert(n[3] == char(0x83)); +#endif w = 0x1005; r = c.out(m, &w, &w+1, wp, n, n+4, np); @@ -52,7 +64,11 @@ assert(n[0] == char(0xE1)); assert(n[1] == char(0x80)); assert(n[2] == char(0x85)); +#if SIZEOF_WCHAR_T == 4 assert(n[3] == char(0x83)); +#else + assert(n[3] == char(0)); +#endif w = 0x453; r = c.out(m, &w, &w+1, wp, n, n+4, np); @@ -62,7 +78,11 @@ assert(n[0] == char(0xD1)); assert(n[1] == char(0x93)); assert(n[2] == char(0x85)); +#if SIZEOF_WCHAR_T == 4 assert(n[3] == char(0x83)); +#else + assert(n[3] == char(0)); +#endif w = 0x56; r = c.out(m, &w, &w+1, wp, n, n+4, np); @@ -72,17 +92,24 @@ assert(n[0] == char(0x56)); assert(n[1] == char(0x93)); assert(n[2] == char(0x85)); +#if SIZEOF_WCHAR_T == 4 assert(n[3] == char(0x83)); +#else + assert(n[3] == char(0)); +#endif } { typedef std::codecvt_utf8 C; C c; - wchar_t w = 0x40003; + wchar_t w; char n[4] = {0}; const wchar_t* wp = nullptr; std::mbstate_t m; char* np = nullptr; - std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+4, np); + std::codecvt_base::result r; +#if SIZEOF_WCHAR_T == 4 + w = 0x40003; + r = c.out(m, &w, &w+1, wp, n, n+4, np); assert(r == std::codecvt_base::error); assert(wp == &w); assert(np == n); @@ -90,6 +117,7 @@ assert(n[1] == char(0)); assert(n[2] == char(0)); assert(n[3] == char(0)); +#endif w = 0x1005; r = c.out(m, &w, &w+1, wp, n, n+4, np); @@ -124,12 +152,15 @@ { typedef std::codecvt_utf8 C; C c; - wchar_t w = 0x40003; + wchar_t w; char n[7] = {0}; const wchar_t* wp = nullptr; std::mbstate_t m; char* np = nullptr; - std::codecvt_base::result r = c.out(m, &w, &w+1, wp, n, n+7, np); + std::codecvt_base::result r; +#if SIZEOF_WCHAR_T == 4 + w = 0x40003; + r = c.out(m, &w, &w+1, wp, n, n+7, np); assert(r == std::codecvt_base::ok); assert(wp == &w+1); assert(np == n+7); @@ -140,6 +171,7 @@ assert(n[4] == char(0x80)); assert(n[5] == char(0x80)); assert(n[6] == char(0x83)); +#endif w = 0x1005; r = c.out(m, &w, &w+1, wp, n, n+7, np); @@ -152,7 +184,11 @@ assert(n[3] == char(0xE1)); assert(n[4] == char(0x80)); assert(n[5] == char(0x85)); +#if SIZEOF_WCHAR_T == 4 assert(n[6] == char(0x83)); +#else + assert(n[6] == char(0)); +#endif w = 0x453; r = c.out(m, &w, &w+1, wp, n, n+7, np); @@ -165,7 +201,11 @@ assert(n[3] == char(0xD1)); assert(n[4] == char(0x93)); assert(n[5] == char(0x85)); +#if SIZEOF_WCHAR_T == 4 assert(n[6] == char(0x83)); +#else + assert(n[6] == char(0)); +#endif w = 0x56; r = c.out(m, &w, &w+1, wp, n, n+7, np); @@ -178,7 +218,11 @@ assert(n[3] == char(0x56)); assert(n[4] == char(0x93)); assert(n[5] == char(0x85)); +#if SIZEOF_WCHAR_T == 4 assert(n[6] == char(0x83)); +#else + assert(n[6] == char(0)); +#endif } { typedef std::codecvt_utf8 C; Index: test/std/localization/locales/locale.convenience/conversions/conversions.string/converted.pass.cpp =================================================================== --- test/std/localization/locales/locale.convenience/conversions/conversions.string/converted.pass.cpp +++ test/std/localization/locales/locale.convenience/conversions/conversions.string/converted.pass.cpp @@ -17,6 +17,14 @@ #include #include +#if _WIN32 + #define SIZEOF_WCHAR_T 2 +#elif __SIZEOF_WCHAR_T__ + #define SIZEOF_WCHAR_T __SIZEOF_WCHAR_T__ +#else + #define SIZEOF_WCHAR_T 4 +#endif + int main() { typedef std::codecvt_utf8 Codecvt; @@ -23,6 +31,7 @@ typedef std::wstring_convert Myconv; Myconv myconv; assert(myconv.converted() == 0); +#if SIZEOF_WCHAR_T == 4 std::string bs = myconv.to_bytes(L"\x40003"); assert(myconv.converted() == 1); bs = myconv.to_bytes(L"\x40003\x65"); @@ -29,4 +38,12 @@ assert(myconv.converted() == 2); std::wstring ws = myconv.from_bytes("\xF1\x80\x80\x83"); assert(myconv.converted() == 4); +#else + std::string bs = myconv.to_bytes(L"\x4003"); + assert(myconv.converted() == 1); + bs = myconv.to_bytes(L"\x4003\x65"); + assert(myconv.converted() == 2); + std::wstring ws = myconv.from_bytes("\xE4\x80\x80"); + assert(myconv.converted() == 3); +#endif } Index: test/std/localization/locales/locale.convenience/conversions/conversions.string/from_bytes.pass.cpp =================================================================== --- test/std/localization/locales/locale.convenience/conversions/conversions.string/from_bytes.pass.cpp +++ test/std/localization/locales/locale.convenience/conversions/conversions.string/from_bytes.pass.cpp @@ -20,6 +20,14 @@ #include #include +#if _WIN32 + #define SIZEOF_WCHAR_T 2 +#elif __SIZEOF_WCHAR_T__ + #define SIZEOF_WCHAR_T __SIZEOF_WCHAR_T__ +#else + #define SIZEOF_WCHAR_T 4 +#endif + int main() { { @@ -27,6 +35,7 @@ std::string bs("\xF1\x80\x80\x83"); std::wstring ws = myconv.from_bytes('a'); assert(ws == L"a"); +#if SIZEOF_WCHAR_T == 4 ws = myconv.from_bytes(bs.c_str()); assert(ws == L"\x40003"); ws = myconv.from_bytes(bs); @@ -33,6 +42,7 @@ assert(ws == L"\x40003"); ws = myconv.from_bytes(bs.data(), bs.data() + bs.size()); assert(ws == L"\x40003"); +#endif ws = myconv.from_bytes(""); assert(ws.size() == 0); } Index: test/std/localization/locales/locale.convenience/conversions/conversions.string/to_bytes.pass.cpp =================================================================== --- test/std/localization/locales/locale.convenience/conversions/conversions.string/to_bytes.pass.cpp +++ test/std/localization/locales/locale.convenience/conversions/conversions.string/to_bytes.pass.cpp @@ -20,12 +20,22 @@ #include #include +#if _WIN32 + #define SIZEOF_WCHAR_T 2 +#elif __SIZEOF_WCHAR_T__ + #define SIZEOF_WCHAR_T __SIZEOF_WCHAR_T__ +#else + #define SIZEOF_WCHAR_T 4 +#endif + int main() { { std::wstring_convert > myconv; + std::string bs; +#if SIZEOF_WCHAR_T == 4 std::wstring ws(1, L'\x40003'); - std::string bs = myconv.to_bytes(ws[0]); + bs = myconv.to_bytes(ws[0]); assert(bs == "\xF1\x80\x80\x83"); bs = myconv.to_bytes(ws.c_str()); assert(bs == "\xF1\x80\x80\x83"); @@ -33,6 +43,7 @@ assert(bs == "\xF1\x80\x80\x83"); bs = myconv.to_bytes(ws.data(), ws.data() + ws.size()); assert(bs == "\xF1\x80\x80\x83"); +#endif bs = myconv.to_bytes(L""); assert(bs.size() == 0); }