diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -952,6 +952,7 @@ numeric optional ostream + print queue random ranges diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in --- a/libcxx/include/module.modulemap.in +++ b/libcxx/include/module.modulemap.in @@ -1282,6 +1282,10 @@ // FIXME: should re-export ios, streambuf? export * } + module print { + header "print" + export * + } module queue { header "queue" export initializer_list diff --git a/libcxx/include/print b/libcxx/include/print new file mode 100644 --- /dev/null +++ b/libcxx/include/print @@ -0,0 +1,112 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP_PRINT +#define _LIBCPP_PRINT + +#include <__assert> // all public C++ headers provide the assertion handler +#include <__concepts/same_as.h> +#include <__config> +#include <__format/unicode.h> +#include +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER >= 23 + +# ifndef _LIBCPP_HAS_NO_UNICODE +// This is the code to transcode UTF-8 to UTF-16. This is used on +// Windows for the native Unicode API. The code is modeled to make it +// easier to extend to +// +// P2728R0 Unicode in the Library, Part 1: UTF Transcoding +// +// This paper is still under heavy development so it makes no sense yet +// to strictly follow the paper. +namespace __unicode { + +// The names of these concepts are modelled after P2728R0, but the +// implementation is not. char16_t may contain 32-bits so depending on the +// number of bits is an issue. +template +concept __utf8_code_unit = same_as<_Tp, char8_t> || same_as<_Tp, char>; + +# ifdef _LIBCPP_SHORT_WCHAR +template +concept __utf16_code_unit = same_as<_Tp, char16_t> _LIBCPP_IF_WIDE_CHARACTERS(|| same_as<_Tp, wchar_t>); +template +concept __utf32_code_unit = same_as<_Tp, char32_t>; +# else +template +concept __utf16_code_unit = same_as<_Tp, char16_t>; +template +concept __utf32_code_unit = same_as<_Tp, char32_t> _LIBCPP_IF_WIDE_CHARACTERS(|| same_as<_Tp, wchar_t>); +# endif + +// Pass by reference since an output_iterator may not be copyable. +template +_LIBCPP_HIDE_FROM_ABI constexpr void __encode(_OutIt&, char32_t) = delete; + +template + requires __utf16_code_unit> +_LIBCPP_HIDE_FROM_ABI constexpr void __encode(_OutIt& __out_it, char32_t __value) { + _LIBCPP_ASSERT(__is_scalar_value(__value), "an invalid unicode scalar value results in invalid UTF-16"); + + if (__value < 0x10000) { + *__out_it++ = __value; + return; + } + + __value -= 0x10000; + *__out_it++ = 0xd800 + (__value >> 10); + *__out_it++ = 0xdc00 + (__value & 0x3FF); +} + +template + requires __utf32_code_unit> +_LIBCPP_HIDE_FROM_ABI constexpr void __encode(_OutIt& __out_it, char32_t __value) { + _LIBCPP_ASSERT(__is_scalar_value(__value), "an invalid unicode scalar value results in invalid UTF-32"); + *__out_it++ = __value; +} + +template + requires output_iterator<_OutIt, const iter_value_t<_OutIt>&> && (!same_as, iter_value_t<_InIt>>) +_LIBCPP_HIDE_FROM_ABI constexpr _OutIt __transcode(_InIt __first, _InIt __last, _OutIt __out_it) { + // The __code_point_view has a basic_string_view interface. + // When transcoding becomes part of the standard we probably want to + // look at smarter algorithms. + // For example, when processing a code point that is encoded in + // 1 to 3 code units in UTF-8, the result will always be encoded + // in 1 code unit in UTF-16 (code points that require 4 code + // units in UTF-8 will require 2 code units in UTF-16). + // + // Note if P2728 is accepted types like int may become valid. In that case + // the __code_point_view should use a span. Libc++ will remove support for + // char_traits. + basic_string_view> __data{__first, __last}; + __code_point_view> __view{__data.begin(), __data.end()}; + while (!__view.__at_end()) + __unicode::__encode(__out_it, __view.__consume().__code_point); + return __out_it; +} + +} // namespace __unicode + +# endif // _LIBCPP_HAS_NO_UNICODE + +#endif // _LIBCPP_STD_VER >= 23 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_PRINT diff --git a/libcxx/modules/std/print.cppm b/libcxx/modules/std/print.cppm --- a/libcxx/modules/std/print.cppm +++ b/libcxx/modules/std/print.cppm @@ -8,10 +8,7 @@ //===----------------------------------------------------------------------===// module; -#if __has_include() -# error "include this header unconditionally and uncomment the exported symbols" -# include -#endif +#include export module std:print; export namespace std { diff --git a/libcxx/test/libcxx/input.output/iostream.format/print.fun/transcoding.pass.cpp b/libcxx/test/libcxx/input.output/iostream.format/print.fun/transcoding.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/libcxx/input.output/iostream.format/print.fun/transcoding.pass.cpp @@ -0,0 +1,88 @@ +//===----------------------------------------------------------------------===// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 +// TODO FMT Fix this test using GCC, it currently times out. +// UNSUPPORTED: gcc-12 + +// + +// Tests the UTF-8 to UTF-16/32 encoding. +// UTF-16 is used on Windows to write to the Unicode API. +// UTF-32 is used to test the Windows behaviour on Linux using 32-bit wchar_t. + +#include +#include +#include +#include +#include + +#include "test_macros.h" +#include "make_string.h" + +#define SV(S) MAKE_STRING_VIEW(CharT, S) + +template +constexpr void test(std::basic_string_view expected, std::string_view input) { + assert(expected.size() < 1024); + std::array buffer; + std::ranges::fill(buffer, CharT('*')); + + CharT* out = std::__unicode::__transcode(input.begin(), input.end(), buffer.data()); + + assert(std::basic_string_view(buffer.data(), out) == expected); + + out = std::find_if(out, buffer.end(), [](CharT c) { return c != CharT('*'); }); + assert(out == buffer.end()); +} + +template +constexpr void test() { + // *** Test valid UTF-8 *** +#define TEST(S) test(SV(S), S) + TEST("hello world"); + // copied from benchmarks/std_format_spec_string_unicode.bench.cpp + TEST("Lorem ipsum dolor sit amet, ne sensibus evertitur aliquando his. Iuvaret fabulas qui ex."); + TEST("Lōrem ipsūm dolor sīt æmeÞ, ea vel nostrud feuġǣit, muciūs tēmporiȝusrefērrēnÞur no mel."); + TEST("Лорем ипсум долор сит амет, еу диам тамяуам принципес вис, еяуидем цонцептам диспутандо"); + TEST("入ト年媛ろ舗学ラロ準募ケカ社金スノ屋検れう策他セヲシ引口ぎ集7独ぱクふ出車ぽでぱ円輪ルノ受打わ。"); + TEST("\U0001f636\u200d\U0001f32b\ufe0f"); +#undef TEST + + // *** Test invalid UTF-8 *** + test(SV("\ufffd"), "\xc3"); + test(SV("\ufffd("), "\xc3\x28"); + + // Surrogate range + test(SV("\ufffd"), "\xed\xa0\x80"); // U+D800 + test(SV("\ufffd"), "\xed\xaf\xbf"); // U+DBFF + test(SV("\ufffd"), "\xed\xbf\x80"); // U+DC00 + test(SV("\ufffd"), "\xed\xbf\xbf"); // U+DFFF + + // Beyond valid values + test(SV("\ufffd"), "\xf4\x90\x80\x80"); // U+110000 + test(SV("\ufffd"), "\xf4\xbf\xbf\xbf"); // U+11FFFF + + // Validates http://unicode.org/review/pr-121.html option 3. + test(SV("\u0061\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\u0062"), "\x61\xF1\x80\x80\xE1\x80\xC2\x62"); +} + +constexpr bool test() { + test(); + test(); +#if !defined(TEST_HAS_NO_WIDE_CHARACTERS) + test(); +#endif + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/libcxx/transitive_includes/cxx03.csv b/libcxx/test/libcxx/transitive_includes/cxx03.csv --- a/libcxx/test/libcxx/transitive_includes/cxx03.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx03.csv @@ -636,6 +636,12 @@ ostream type_traits ostream typeinfo ostream version +print cstddef +print cstdint +print initializer_list +print limits +print string_view +print version queue compare queue concepts queue cstddef diff --git a/libcxx/test/libcxx/transitive_includes/cxx11.csv b/libcxx/test/libcxx/transitive_includes/cxx11.csv --- a/libcxx/test/libcxx/transitive_includes/cxx11.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx11.csv @@ -637,6 +637,12 @@ ostream type_traits ostream typeinfo ostream version +print cstddef +print cstdint +print initializer_list +print limits +print string_view +print version queue compare queue concepts queue cstddef diff --git a/libcxx/test/libcxx/transitive_includes/cxx14.csv b/libcxx/test/libcxx/transitive_includes/cxx14.csv --- a/libcxx/test/libcxx/transitive_includes/cxx14.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx14.csv @@ -639,6 +639,12 @@ ostream type_traits ostream typeinfo ostream version +print cstddef +print cstdint +print initializer_list +print limits +print string_view +print version queue compare queue concepts queue cstddef diff --git a/libcxx/test/libcxx/transitive_includes/cxx17.csv b/libcxx/test/libcxx/transitive_includes/cxx17.csv --- a/libcxx/test/libcxx/transitive_includes/cxx17.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx17.csv @@ -639,6 +639,12 @@ ostream type_traits ostream typeinfo ostream version +print cstddef +print cstdint +print initializer_list +print limits +print string_view +print version queue compare queue concepts queue cstddef diff --git a/libcxx/test/libcxx/transitive_includes/cxx20.csv b/libcxx/test/libcxx/transitive_includes/cxx20.csv --- a/libcxx/test/libcxx/transitive_includes/cxx20.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx20.csv @@ -645,6 +645,12 @@ ostream type_traits ostream typeinfo ostream version +print cstddef +print cstdint +print initializer_list +print limits +print string_view +print version queue compare queue concepts queue cstddef diff --git a/libcxx/test/libcxx/transitive_includes/cxx23.csv b/libcxx/test/libcxx/transitive_includes/cxx23.csv --- a/libcxx/test/libcxx/transitive_includes/cxx23.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx23.csv @@ -435,6 +435,12 @@ ostream string ostream typeinfo ostream version +print cstddef +print cstdint +print initializer_list +print limits +print string_view +print version queue compare queue cstddef queue cstdint diff --git a/libcxx/test/libcxx/transitive_includes/cxx26.csv b/libcxx/test/libcxx/transitive_includes/cxx26.csv --- a/libcxx/test/libcxx/transitive_includes/cxx26.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx26.csv @@ -435,6 +435,12 @@ ostream string ostream typeinfo ostream version +print cstddef +print cstdint +print initializer_list +print limits +print string_view +print version queue compare queue cstddef queue cstdint diff --git a/libcxx/utils/ci/run-buildbot b/libcxx/utils/ci/run-buildbot --- a/libcxx/utils/ci/run-buildbot +++ b/libcxx/utils/ci/run-buildbot @@ -247,6 +247,7 @@ --exclude 'locale-specific_form.pass.cpp' \ --exclude 'ostream.pass.cpp' \ --exclude 'std_format_spec_string_unicode.bench.cpp' \ + --exclude 'transcoding.pass.cpp' \ --exclude 'underflow.pass.cpp' \ || false