diff --git a/libcxx/include/wchar.h b/libcxx/include/wchar.h --- a/libcxx/include/wchar.h +++ b/libcxx/include/wchar.h @@ -170,13 +170,13 @@ } #endif -#if defined(__cplusplus) && defined(_LIBCPP_MSVCRT_LIKE) +#if defined(__cplusplus) && (defined(_LIBCPP_MSVCRT_LIKE) || defined(__MVS__)) extern "C" { size_t mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src, size_t nmc, size_t len, mbstate_t *__restrict ps); size_t wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src, size_t nwc, size_t len, mbstate_t *__restrict ps); -} // extern "C++" -#endif // __cplusplus && _LIBCPP_MSVCRT +} // extern "C" +#endif // __cplusplus && (_LIBCPP_MSVCRT || __MVS__) #endif // _LIBCPP_WCHAR_H diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt --- a/libcxx/src/CMakeLists.txt +++ b/libcxx/src/CMakeLists.txt @@ -98,6 +98,8 @@ ) elseif(ZOS) list(APPEND LIBCXX_SOURCES + support/ibm/mbsnrtowcs.cpp + support/ibm/wcsnrtombs.cpp support/ibm/xlocale_zos.cpp ) endif() diff --git a/libcxx/src/support/ibm/mbsnrtowcs.cpp b/libcxx/src/support/ibm/mbsnrtowcs.cpp new file mode 100644 --- /dev/null +++ b/libcxx/src/support/ibm/mbsnrtowcs.cpp @@ -0,0 +1,95 @@ +//===----------------------- mbsnrtowcs.cpp -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include // size_t +#include // mbstate_t +#include // MB_LEN_MAX +#include // wmemcpy + +// Returns the number of wide characters found in the multi byte sequence `src` +// (of `src_size_bytes`), that fit in the buffer `dst` (of `max_dest_chars` +// elements size). The count returned excludes the null terminator. +// When `dst` is NULL, no characters are copied to `dst`. +// Returns (size_t) -1 when an invalid sequence is encountered. +// Leaves *`src` pointing to the next character to convert or NULL +// if a null character was converted from *`src`. +_LIBCPP_FUNC_VIS +size_t mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src, + size_t src_size_bytes, size_t max_dest_chars, + mbstate_t *__restrict ps) { + const size_t terminated_sequence = static_cast(0); + const size_t invalid_sequence = static_cast(-1); + const size_t incomplete_sequence = static_cast(-2); + + size_t source_converted; + size_t dest_converted; + size_t result = 0; + + // If `dst` is null then `max_dest_chars` should be ignored according to the + // standard. Setting `max_dest_chars` to a large value has this effect. + if (dst == nullptr) + max_dest_chars = static_cast(-1); + + for (dest_converted = source_converted = 0; + source_converted < src_size_bytes && (!dst || dest_converted < max_dest_chars); + ++dest_converted, source_converted += result) { + // Converts one multi byte character. + // If result (char_size) is greater than 0, it's the size in bytes of that character. + // If result (char_size) is zero, it indicates that the null character has been found. + // Otherwise, it's an error and errno may be set. + size_t source_remaining = src_size_bytes - source_converted; + size_t dest_remaining = max_dest_chars - dest_converted; + + if (dst == nullptr) { + result = mbrtowc(NULL, *src + source_converted, source_remaining, ps); + } else if (dest_remaining >= source_remaining) { + // dst has enough space to translate in-place. + result = mbrtowc(dst + dest_converted, *src + source_converted, source_remaining, ps); + } else { + /* + * dst may not have enough space, so use a temporary buffer. + * + * We need to save a copy of the conversion state + * here so we can restore it if the multibyte + * character is too long for the buffer. + */ + wchar_t buff[MB_LEN_MAX]; + mbstate_t mbstate_tmp; + + if (ps != nullptr) + mbstate_tmp = *ps; + result = mbrtowc(buff, *src + source_converted, source_remaining, ps); + + if (result > dest_remaining) { + // Multi-byte sequence for character won't fit. + if (ps != nullptr) + *ps = mbstate_tmp; + break; + } else { + // The buffer was used, so we need copy the translation to dst. + wmemcpy(dst, buff, result); + } + } + + // Don't do anything to change errno from here on. + if (result == invalid_sequence || result == terminated_sequence || result == incomplete_sequence) { + break; + } + } + + if (dst) { + if (result == terminated_sequence) + *src = NULL; + else + *src += source_converted; + } + if (result == invalid_sequence) + return invalid_sequence; + + return dest_converted; +} diff --git a/libcxx/src/support/ibm/wcsnrtombs.cpp b/libcxx/src/support/ibm/wcsnrtombs.cpp new file mode 100644 --- /dev/null +++ b/libcxx/src/support/ibm/wcsnrtombs.cpp @@ -0,0 +1,93 @@ +//===----------------------- wcsnrtombs.cpp -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include // mbstate_t +#include // MB_LEN_MAX +#include // MB_CUR_MAX, size_t +#include // memcpy + +// Converts `max_source_chars` from the wide character buffer pointer to by *`src`, +// into the multi byte character sequence buffer stored at `dst`, which must be +// `dst_size_bytes` bytes in size. Returns the number of bytes in the sequence +// converted from *src, excluding the null terminator. +// Returns (size_t) -1 if an error occurs and sets errno. +// If `dst` is NULL, `dst_size_bytes` is ignored and no bytes are copied to `dst`. +_LIBCPP_FUNC_VIS +size_t wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src, + size_t max_source_chars, size_t dst_size_bytes, + mbstate_t *__restrict ps) { + + const size_t invalid_wchar = static_cast(-1); + + size_t source_converted; + size_t dest_converted; + size_t result = 0; + + // If `dst` is null then `dst_size_bytes` should be ignored according to the + // standard. Setting dst_size_bytes to a large value has this effect. + if (dst == nullptr) + dst_size_bytes = static_cast(-1); + + for (dest_converted = source_converted = 0; + source_converted < max_source_chars && (!dst || dest_converted < dst_size_bytes); + ++source_converted, dest_converted += result) { + wchar_t c = (*src)[source_converted]; + size_t dest_remaining = dst_size_bytes - dest_converted; + + if (dst == nullptr) { + result = wcrtomb(NULL, c, ps); + } else if (dest_remaining >= static_cast(MB_CUR_MAX)) { + // dst has enough space to translate in-place. + result = wcrtomb(dst + dest_converted, c, ps); + } else { + /* + * dst may not have enough space, so use a temporary buffer. + * + * We need to save a copy of the conversion state + * here so we can restore it if the multibyte + * character is too long for the buffer. + */ + char buff[MB_LEN_MAX]; + mbstate_t mbstate_tmp; + + if (ps != nullptr) + mbstate_tmp = *ps; + result = wcrtomb(buff, c, ps); + + if (result > dest_remaining) { + // Multi-byte sequence for character won't fit. + if (ps != nullptr) + *ps = mbstate_tmp; + if (result != invalid_wchar) + break; + } else { + // The buffer was used, so we need copy the translation to dst. + memcpy(dst, buff, result); + } + } + + // result (char_size) contains the size of the multi-byte-sequence converted. + // Otherwise, result (char_size) is (size_t) -1 and wcrtomb() sets the errno. + if (result == invalid_wchar) { + if (dst) + *src = *src + source_converted; + return invalid_wchar; + } + + if (c == L'\0') { + if (dst) + *src = NULL; + return dest_converted; + } + } + + if (dst) + *src = *src + source_converted; + + return dest_converted; +}