diff --git a/libcxx/docs/ReleaseNotes.rst b/libcxx/docs/ReleaseNotes.rst --- a/libcxx/docs/ReleaseNotes.rst +++ b/libcxx/docs/ReleaseNotes.rst @@ -40,6 +40,7 @@ - P2520R0 - ``move_iterator`` should be a random access iterator - P1328R1 - ``constexpr type_info::operator==()`` - P1413R3 - Formatting ``thread::id`` (the ``stacktrace`` is not done yet) +- P2675R1 - ``format``'s width estimation is too approximate and not forward compatible Improvements and New Features ----------------------------- diff --git a/libcxx/docs/Status/Cxx2bPapers.csv b/libcxx/docs/Status/Cxx2bPapers.csv --- a/libcxx/docs/Status/Cxx2bPapers.csv +++ b/libcxx/docs/Status/Cxx2bPapers.csv @@ -111,7 +111,7 @@ "`P2711R1 `__","LWG", "Making multi-param constructors of ``views`` ``explicit``","February 2023","|Partial| [#note-P2711R1]_","","|ranges|" "`P2609R3 `__","LWG", "Relaxing Ranges Just A Smidge","February 2023","","","|ranges|" "`P2713R1 `__","LWG", "Escaping improvements in ``std::format``","February 2023","","","|format|" -"`P2675R1 `__","LWG", "``format``'s width estimation is too approximate and not forward compatible","February 2023","","","|format|" +"`P2675R1 `__","LWG", "``format``'s width estimation is too approximate and not forward compatible","February 2023","|Complete|","17.0","|format|" "`P2572R1 `__","LWG", "``std::format`` fill character allowances","February 2023","","","|format|" "`P2693R1 `__","LWG", "Formatting ``thread::id`` and ``stacktrace``","February 2023","|Partial| [#note-P2693R1]_","","|format|" "`P2679R2 `__","LWG", "Fixing ``std::start_lifetime_as`` for arrays","February 2023","","","" diff --git a/libcxx/docs/Status/FormatIssues.csv b/libcxx/docs/Status/FormatIssues.csv --- a/libcxx/docs/Status/FormatIssues.csv +++ b/libcxx/docs/Status/FormatIssues.csv @@ -11,7 +11,7 @@ "`P2585R0 `__","Improving default container formatting","C++23","Mark de Wever","|Complete|", Clang 17 "`P2539R4 `__","Should the output of ``std::print`` to a terminal be synchronized with the underlying stream?","C++23","Mark de Wever" "`P2713R1 `__","Escaping improvements in ``std::format``","C++23","Mark de Wever","" -"`P2675R1 `__","``format``'s width estimation is too approximate and not forward compatible","C++23","Mark de Wever","" +"`P2675R1 `__","``format``'s width estimation is too approximate and not forward compatible","C++23","Mark de Wever","|Complete|", Clang 17 "`P2572R1 `__","``std::format`` fill character allowances","C++23","Mark de Wever","|In progress|" "`P2693R1 `__","Formatting ``thread::id`` and ``stacktrace``","C++23","Mark de Wever","|In progress|" `P1361 `_,"Integration of chrono with text formatting","C++20",Mark de Wever,|In Progress|, diff --git a/libcxx/docs/UsingLibcxx.rst b/libcxx/docs/UsingLibcxx.rst --- a/libcxx/docs/UsingLibcxx.rst +++ b/libcxx/docs/UsingLibcxx.rst @@ -517,3 +517,9 @@ ``format-string`` and ``wformat-string`` became ``basic_format_string``, ``format_string``, and ``wformat_string`` in C++23. Libc++ makes these types available in C++20 as an extension. + +For padding Unicode strings the ``format`` library relies on the Unicode +Standard. Libc++ retroactively updates the Unicode Standard in older C++ +versions. This allows the library to have better estimates for newly introduced +Unicode code points, without requiring the user to use the latest C++ version +in their code base. diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -357,6 +357,7 @@ __format/range_default_formatter.h __format/range_formatter.h __format/unicode.h + __format/width_estimation_table.h __functional/binary_function.h __functional/binary_negate.h __functional/bind.h diff --git a/libcxx/include/__format/parser_std_format_spec.h b/libcxx/include/__format/parser_std_format_spec.h --- a/libcxx/include/__format/parser_std_format_spec.h +++ b/libcxx/include/__format/parser_std_format_spec.h @@ -28,6 +28,7 @@ #include <__format/format_parse_context.h> #include <__format/format_string.h> #include <__format/unicode.h> +#include <__format/width_estimation_table.h> #include <__iterator/concepts.h> #include <__iterator/readable_traits.h> // iter_value_t #include <__type_traits/common_type.h> @@ -786,57 +787,6 @@ # ifndef _LIBCPP_HAS_NO_UNICODE namespace __detail { - -/// Converts a code point to the column width. -/// -/// The estimations are conforming to [format.string.general]/11 -/// -/// This version expects a value less than 0x1'0000, which is a 3-byte UTF-8 -/// character. -_LIBCPP_HIDE_FROM_ABI constexpr int __column_width_3(uint32_t __c) noexcept { - _LIBCPP_ASSERT(__c < 0x10000, "Use __column_width_4 or __column_width for larger values"); - - // clang-format off - return 1 + (__c >= 0x1100 && (__c <= 0x115f || - (__c >= 0x2329 && (__c <= 0x232a || - (__c >= 0x2e80 && (__c <= 0x303e || - (__c >= 0x3040 && (__c <= 0xa4cf || - (__c >= 0xac00 && (__c <= 0xd7a3 || - (__c >= 0xf900 && (__c <= 0xfaff || - (__c >= 0xfe10 && (__c <= 0xfe19 || - (__c >= 0xfe30 && (__c <= 0xfe6f || - (__c >= 0xff00 && (__c <= 0xff60 || - (__c >= 0xffe0 && (__c <= 0xffe6 - )))))))))))))))))))); - // clang-format on -} - -/// @overload -/// -/// This version expects a value greater than or equal to 0x1'0000, which is a -/// 4-byte UTF-8 character. -_LIBCPP_HIDE_FROM_ABI constexpr int __column_width_4(uint32_t __c) noexcept { - _LIBCPP_ASSERT(__c >= 0x10000, "Use __column_width_3 or __column_width for smaller values"); - - // clang-format off - return 1 + (__c >= 0x1'f300 && (__c <= 0x1'f64f || - (__c >= 0x1'f900 && (__c <= 0x1'f9ff || - (__c >= 0x2'0000 && (__c <= 0x2'fffd || - (__c >= 0x3'0000 && (__c <= 0x3'fffd - )))))))); - // clang-format on -} - -/// @overload -/// -/// The general case, accepting all values. -_LIBCPP_HIDE_FROM_ABI constexpr int __column_width(uint32_t __c) noexcept { - if (__c < 0x10000) - return __detail::__column_width_3(__c); - - return __detail::__column_width_4(__c); -} - template _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_Iterator> __estimate_column_width_grapheme_clustering( _Iterator __first, _Iterator __last, size_t __maximum, __column_width_rounding __rounding) noexcept { @@ -846,7 +796,7 @@ __column_width_result<_Iterator> __result{0, __first}; while (__result.__last_ != __last && __result.__width_ <= __maximum) { typename __unicode::__extended_grapheme_cluster_view<_CharT>::__cluster __cluster = __view.__consume(); - int __width = __detail::__column_width(__cluster.__code_point_); + int __width = __width_estimation_table::__estimated_width(__cluster.__code_point_); // When the next entry would exceed the maximum width the previous width // might be returned. For example when a width of 100 is requested the diff --git a/libcxx/include/__format/width_estimation_table.h b/libcxx/include/__format/width_estimation_table.h new file mode 100644 --- /dev/null +++ b/libcxx/include/__format/width_estimation_table.h @@ -0,0 +1,271 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// WARNING, this entire header is generated by +// utils/generate_width_estimation_table.py +// DO NOT MODIFY! + +// UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE +// +// See Terms of Use +// for definitions of Unicode Inc.'s Data Files and Software. +// +// NOTICE TO USER: Carefully read the following legal agreement. +// BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S +// DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"), +// YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE +// TERMS AND CONDITIONS OF THIS AGREEMENT. +// IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE +// THE DATA FILES OR SOFTWARE. +// +// COPYRIGHT AND PERMISSION NOTICE +// +// Copyright (c) 1991-2022 Unicode, Inc. All rights reserved. +// Distributed under the Terms of Use in https://www.unicode.org/copyright.html. +// +// Permission is hereby granted, free of charge, to any person obtaining +// a copy of the Unicode data files and any associated documentation +// (the "Data Files") or Unicode software and any associated documentation +// (the "Software") to deal in the Data Files or Software +// without restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, and/or sell copies of +// the Data Files or Software, and to permit persons to whom the Data Files +// or Software are furnished to do so, provided that either +// (a) this copyright and permission notice appear with all copies +// of the Data Files or Software, or +// (b) this copyright and permission notice appear in associated +// Documentation. +// +// THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT OF THIRD PARTY RIGHTS. +// IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS +// NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL +// DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, +// DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +// TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +// PERFORMANCE OF THE DATA FILES OR SOFTWARE. +// +// Except as contained in this notice, the name of a copyright holder +// shall not be used in advertising or otherwise to promote the sale, +// use or other dealings in these Data Files or Software without prior +// written authorization of the copyright holder. + +#ifndef _LIBCPP___FORMAT_WIDTH_ESTIMATION_TABLE_H +#define _LIBCPP___FORMAT_WIDTH_ESTIMATION_TABLE_H + +#include <__algorithm/ranges_upper_bound.h> +#include <__config> +#include +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER >= 20 + +namespace __width_estimation_table { + +/// The entries of the characters with an estimated width of 2. +/// +/// Contains the entries for [format.string.std]/12 +/// - Any code point with the East_Asian_Width="W" or East_Asian_Width="F" +/// Derived Extracted Property as described by UAX #44 +/// - U+4DC0 - U+4DFF (Yijing Hexagram Symbols) +/// - U+1F300 - U+1F5FF (Miscellaneous Symbols and Pictographs) +/// - U+1F900 - U+1F9FF (Supplemental Symbols and Pictographs) +/// +/// The data is generated from +/// - https://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt +/// - The "overrides" in [format.string.std]/12 +/// +/// The format of EastAsianWidth.txt is two fields separated by a semicolon. +/// Field 0: Unicode code point value or range of code point values +/// Field 1: East_Asian_Width property, consisting of one of the following values: +/// "A", "F", "H", "N", "Na", "W" +/// - All code points, assigned or unassigned, that are not listed +/// explicitly are given the value "N". +/// - The unassigned code points in the following blocks default to "W": +/// CJK Unified Ideographs Extension A: U+3400..U+4DBF +/// CJK Unified Ideographs: U+4E00..U+9FFF +/// CJK Compatibility Ideographs: U+F900..U+FAFF +/// - All undesignated code points in Planes 2 and 3, whether inside or +/// outside of allocated blocks, default to "W": +/// Plane 2: U+20000..U+2FFFD +/// Plane 3: U+30000..U+3FFFD +/// +/// The table is similar to the table +/// __extended_grapheme_custer_property_boundary::__entries +/// which explains the details of these classes. The only difference is this +/// table lacks a property, thus having more bits available for the size. +/// +/// The maximum code point that has an estimated width of 2 is U+3FFFD. This +/// value can be encoded in 18 bits. Thus the upper 3 bits of the code point +/// are always 0. These 3 bits are used to enlarge the offset range. This +/// optimization reduces the table in Unicode 15 from 184 to 104 entries, +/// saving 320 bytes. +/// +/// The data has 2 values: +/// - bits [0, 13] The size of the range, allowing 16384 elements. +/// - bits [14, 31] The lower bound code point of the range. The upper bound of +/// the range is lower bound + size. +inline constexpr uint32_t __entries[108] = { + 0x0440005f /* 00001100 - 0000115f [ 96] */, // + 0x08c68001 /* 0000231a - 0000231b [ 2] */, // + 0x08ca4001 /* 00002329 - 0000232a [ 2] */, // + 0x08fa4003 /* 000023e9 - 000023ec [ 4] */, // + 0x08fc0000 /* 000023f0 - 000023f0 [ 1] */, // + 0x08fcc000 /* 000023f3 - 000023f3 [ 1] */, // + 0x097f4001 /* 000025fd - 000025fe [ 2] */, // + 0x09850001 /* 00002614 - 00002615 [ 2] */, // + 0x0992000b /* 00002648 - 00002653 [ 12] */, // + 0x099fc000 /* 0000267f - 0000267f [ 1] */, // + 0x09a4c000 /* 00002693 - 00002693 [ 1] */, // + 0x09a84000 /* 000026a1 - 000026a1 [ 1] */, // + 0x09aa8001 /* 000026aa - 000026ab [ 2] */, // + 0x09af4001 /* 000026bd - 000026be [ 2] */, // + 0x09b10001 /* 000026c4 - 000026c5 [ 2] */, // + 0x09b38000 /* 000026ce - 000026ce [ 1] */, // + 0x09b50000 /* 000026d4 - 000026d4 [ 1] */, // + 0x09ba8000 /* 000026ea - 000026ea [ 1] */, // + 0x09bc8001 /* 000026f2 - 000026f3 [ 2] */, // + 0x09bd4000 /* 000026f5 - 000026f5 [ 1] */, // + 0x09be8000 /* 000026fa - 000026fa [ 1] */, // + 0x09bf4000 /* 000026fd - 000026fd [ 1] */, // + 0x09c14000 /* 00002705 - 00002705 [ 1] */, // + 0x09c28001 /* 0000270a - 0000270b [ 2] */, // + 0x09ca0000 /* 00002728 - 00002728 [ 1] */, // + 0x09d30000 /* 0000274c - 0000274c [ 1] */, // + 0x09d38000 /* 0000274e - 0000274e [ 1] */, // + 0x09d4c002 /* 00002753 - 00002755 [ 3] */, // + 0x09d5c000 /* 00002757 - 00002757 [ 1] */, // + 0x09e54002 /* 00002795 - 00002797 [ 3] */, // + 0x09ec0000 /* 000027b0 - 000027b0 [ 1] */, // + 0x09efc000 /* 000027bf - 000027bf [ 1] */, // + 0x0ac6c001 /* 00002b1b - 00002b1c [ 2] */, // + 0x0ad40000 /* 00002b50 - 00002b50 [ 1] */, // + 0x0ad54000 /* 00002b55 - 00002b55 [ 1] */, // + 0x0ba00019 /* 00002e80 - 00002e99 [ 26] */, // + 0x0ba6c058 /* 00002e9b - 00002ef3 [ 89] */, // + 0x0bc000d5 /* 00002f00 - 00002fd5 [ 214] */, // + 0x0bfc000b /* 00002ff0 - 00002ffb [ 12] */, // + 0x0c00003e /* 00003000 - 0000303e [ 63] */, // + 0x0c104055 /* 00003041 - 00003096 [ 86] */, // + 0x0c264066 /* 00003099 - 000030ff [ 103] */, // + 0x0c41402a /* 00003105 - 0000312f [ 43] */, // + 0x0c4c405d /* 00003131 - 0000318e [ 94] */, // + 0x0c640053 /* 00003190 - 000031e3 [ 84] */, // + 0x0c7c002e /* 000031f0 - 0000321e [ 47] */, // + 0x0c880027 /* 00003220 - 00003247 [ 40] */, // + 0x0c943fff /* 00003250 - 0000724f [16384] */, // + 0x1c94323c /* 00007250 - 0000a48c [12861] */, // + 0x29240036 /* 0000a490 - 0000a4c6 [ 55] */, // + 0x2a58001c /* 0000a960 - 0000a97c [ 29] */, // + 0x2b002ba3 /* 0000ac00 - 0000d7a3 [11172] */, // + 0x3e4001ff /* 0000f900 - 0000faff [ 512] */, // + 0x3f840009 /* 0000fe10 - 0000fe19 [ 10] */, // + 0x3f8c0022 /* 0000fe30 - 0000fe52 [ 35] */, // + 0x3f950012 /* 0000fe54 - 0000fe66 [ 19] */, // + 0x3f9a0003 /* 0000fe68 - 0000fe6b [ 4] */, // + 0x3fc0405f /* 0000ff01 - 0000ff60 [ 96] */, // + 0x3ff80006 /* 0000ffe0 - 0000ffe6 [ 7] */, // + 0x5bf80004 /* 00016fe0 - 00016fe4 [ 5] */, // + 0x5bfc0001 /* 00016ff0 - 00016ff1 [ 2] */, // + 0x5c0017f7 /* 00017000 - 000187f7 [ 6136] */, // + 0x620004d5 /* 00018800 - 00018cd5 [ 1238] */, // + 0x63400008 /* 00018d00 - 00018d08 [ 9] */, // + 0x6bfc0003 /* 0001aff0 - 0001aff3 [ 4] */, // + 0x6bfd4006 /* 0001aff5 - 0001affb [ 7] */, // + 0x6bff4001 /* 0001affd - 0001affe [ 2] */, // + 0x6c000122 /* 0001b000 - 0001b122 [ 291] */, // + 0x6c4c8000 /* 0001b132 - 0001b132 [ 1] */, // + 0x6c540002 /* 0001b150 - 0001b152 [ 3] */, // + 0x6c554000 /* 0001b155 - 0001b155 [ 1] */, // + 0x6c590003 /* 0001b164 - 0001b167 [ 4] */, // + 0x6c5c018b /* 0001b170 - 0001b2fb [ 396] */, // + 0x7c010000 /* 0001f004 - 0001f004 [ 1] */, // + 0x7c33c000 /* 0001f0cf - 0001f0cf [ 1] */, // + 0x7c638000 /* 0001f18e - 0001f18e [ 1] */, // + 0x7c644009 /* 0001f191 - 0001f19a [ 10] */, // + 0x7c800002 /* 0001f200 - 0001f202 [ 3] */, // + 0x7c84002b /* 0001f210 - 0001f23b [ 44] */, // + 0x7c900008 /* 0001f240 - 0001f248 [ 9] */, // + 0x7c940001 /* 0001f250 - 0001f251 [ 2] */, // + 0x7c980005 /* 0001f260 - 0001f265 [ 6] */, // + 0x7cc0034f /* 0001f300 - 0001f64f [ 848] */, // + 0x7da00045 /* 0001f680 - 0001f6c5 [ 70] */, // + 0x7db30000 /* 0001f6cc - 0001f6cc [ 1] */, // + 0x7db40002 /* 0001f6d0 - 0001f6d2 [ 3] */, // + 0x7db54002 /* 0001f6d5 - 0001f6d7 [ 3] */, // + 0x7db70003 /* 0001f6dc - 0001f6df [ 4] */, // + 0x7dbac001 /* 0001f6eb - 0001f6ec [ 2] */, // + 0x7dbd0008 /* 0001f6f4 - 0001f6fc [ 9] */, // + 0x7df8000b /* 0001f7e0 - 0001f7eb [ 12] */, // + 0x7dfc0000 /* 0001f7f0 - 0001f7f0 [ 1] */, // + 0x7e4000ff /* 0001f900 - 0001f9ff [ 256] */, // + 0x7e9c000c /* 0001fa70 - 0001fa7c [ 13] */, // + 0x7ea00008 /* 0001fa80 - 0001fa88 [ 9] */, // + 0x7ea4002d /* 0001fa90 - 0001fabd [ 46] */, // + 0x7eafc006 /* 0001fabf - 0001fac5 [ 7] */, // + 0x7eb3800d /* 0001face - 0001fadb [ 14] */, // + 0x7eb80008 /* 0001fae0 - 0001fae8 [ 9] */, // + 0x7ebc0008 /* 0001faf0 - 0001faf8 [ 9] */, // + 0x80003fff /* 00020000 - 00023fff [16384] */, // + 0x90003fff /* 00024000 - 00027fff [16384] */, // + 0xa0003fff /* 00028000 - 0002bfff [16384] */, // + 0xb0003ffd /* 0002c000 - 0002fffd [16382] */, // + 0xc0003fff /* 00030000 - 00033fff [16384] */, // + 0xd0003fff /* 00034000 - 00037fff [16384] */, // + 0xe0003fff /* 00038000 - 0003bfff [16384] */, // + 0xf0003ffd /* 0003c000 - 0003fffd [16382] */}; + +/// The upper bound entry of EastAsianWidth.txt. +/// +/// Values greater than this value may have more than 18 significant bits. +/// They always have a width of 1. This property makes it possible to store +/// the table in its compact form. +inline constexpr uint32_t __table_upper_bound = 0x0003fffd; + +/// Returns the estimated width of a Unicode code point. +/// +/// \pre The code point is a valid Unicode code point. +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr int __estimated_width(const char32_t __code_point) noexcept { + // Since __table_upper_bound contains the unshifted range do the + // comparison without shifting. + if (__code_point > __table_upper_bound) [[unlikely]] + return 1; + + // When the code-point is less than the first element in the table + // the lookup is quite expensive. Since quite some scripts are in + // that range, it makes sense to validate that first. + // The std_format_spec_string_unicode benchmark gives a measurable + // improvement. + if (__code_point < (__entries[0] >> 14)) + return 1; + + ptrdiff_t __i = std::ranges::upper_bound(__entries, (__code_point << 14) | 0x3fffu) - __entries; + if (__i == 0) + return 1; + + --__i; + uint32_t __upper_bound = (__entries[__i] >> 14) + (__entries[__i] & 0x3fffu); + return 1 + (__code_point <= __upper_bound); +} + +} // namespace __width_estimation_table + +#endif //_LIBCPP_STD_VER >= 20 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FORMAT_WIDTH_ESTIMATION_TABLE_H diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in --- a/libcxx/include/module.modulemap.in +++ b/libcxx/include/module.modulemap.in @@ -932,6 +932,7 @@ module range_default_formatter { private header "__format/range_default_formatter.h" } module range_formatter { private header "__format/range_formatter.h" } module unicode { private header "__format/unicode.h" } + module width_estimation_table { private header "__format/width_estimation_table.h" } } } module forward_list { diff --git a/libcxx/test/libcxx/private_headers.verify.cpp b/libcxx/test/libcxx/private_headers.verify.cpp --- a/libcxx/test/libcxx/private_headers.verify.cpp +++ b/libcxx/test/libcxx/private_headers.verify.cpp @@ -389,6 +389,7 @@ #include <__format/range_default_formatter.h> // expected-error@*:* {{use of private header from outside its module: '__format/range_default_formatter.h'}} #include <__format/range_formatter.h> // expected-error@*:* {{use of private header from outside its module: '__format/range_formatter.h'}} #include <__format/unicode.h> // expected-error@*:* {{use of private header from outside its module: '__format/unicode.h'}} +#include <__format/width_estimation_table.h> // expected-error@*:* {{use of private header from outside its module: '__format/width_estimation_table.h'}} #include <__functional/binary_function.h> // expected-error@*:* {{use of private header from outside its module: '__functional/binary_function.h'}} #include <__functional/binary_negate.h> // expected-error@*:* {{use of private header from outside its module: '__functional/binary_negate.h'}} #include <__functional/bind.h> // expected-error@*:* {{use of private header from outside its module: '__functional/bind.h'}} diff --git a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/code_point_width_estimation.pass.cpp b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/code_point_width_estimation.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/code_point_width_estimation.pass.cpp @@ -0,0 +1,87 @@ +//===----------------------------------------------------------------------===// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: libcpp-has-no-incomplete-format +// TODO FMT Fix this test using GCC, it currently times out. +// UNSUPPORTED: gcc-12 + +// + +// Tests the implementation of the extended grapheme cluster boundaries per +// https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules +// +// The tests are based on the test data provided by Unicode +// https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt + +#include +#include +#include +#include + +#include "test_macros.h" + +TEST_DIAGNOSTIC_PUSH +TEST_CLANG_DIAGNOSTIC_IGNORED("-Wprivate-header") +#include <__format/width_estimation_table.h> +TEST_DIAGNOSTIC_POP + +// [format.string.std]/12 +// +// - U+4DC0 - U+4DFF (Yijing Hexagram Symbols) +// - U+1F300 - U+1F5FF (Miscellaneous Symbols and Pictographs) +// - U+1F900 - U+1F9FF (Supplemental Symbols and Pictographs) +static void constexpr test_hardcoded_values() { + auto test = [](char32_t c) { assert(std::__width_estimation_table::__estimated_width(c) == 2); }; + for (char32_t c = 0x4DC0; c <= 0x4DFF; ++c) + test(c); + for (char32_t c = 0x1F300; c <= 0x1F5FF; ++c) + test(c); + for (char32_t c = 0x1F900; c <= 0x1F9FF; ++c) + test(c); +} + +static void constexpr test_invalid_values() { + auto test = [](char32_t c) { assert(std::__width_estimation_table::__estimated_width(c) == 1); }; + + // The surrogate range + for (char32_t c = 0xD800; c <= 0xDFFF; ++c) + test(c); + + // The first 256 non valid code points + for (char32_t c = 0x110000; c <= 0x1100FF; ++c) + test(c); +} + +static void constexpr test_optimization_boundaries() { + // Entries after the table have a width of 1. + static_assert(*(std::end(std::__width_estimation_table::__entries) - 1) == ((0x3c000u << 14) | 16381u), + "validate whether the optimizations in __estimated_width are still valid"); + assert(std::__width_estimation_table::__estimated_width(0x3fffd) == 2); + assert(std::__width_estimation_table::__estimated_width(0x3fffe) == 1); + + // Entries before the table have a width of 1. + static_assert(std::__width_estimation_table::__entries[0] >> 14 == 0x1100, + "validate whether the optimizations in __estimated_width are still valid"); + assert(std::__width_estimation_table::__estimated_width(0x10FF) == 1); + assert(std::__width_estimation_table::__estimated_width(0x1100) == 2); +} + +static constexpr bool test() { + test_hardcoded_values(); + test_invalid_values(); + test_optimization_boundaries(); + + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/std/utilities/format/format.functions/unicode.pass.cpp b/libcxx/test/std/utilities/format/format.functions/unicode.pass.cpp --- a/libcxx/test/std/utilities/format/format.functions/unicode.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/unicode.pass.cpp @@ -81,8 +81,8 @@ check(SV("*\u2e80*"), SV("{:*^4}"), SV("\u2e80")); // CJK RADICAL REPEAT check(SV("*\u303e*"), SV("{:*^4}"), SV("\u303e")); // IDEOGRAPHIC VARIATION INDICATOR - check(SV("*\u3040*"), SV("{:*^4}"), SV("\u3040")); // U+3041 HIRAGANA LETTER SMALL A - check(SV("*\ua4cf*"), SV("{:*^4}"), SV("\ua4cf")); // U+A4D0 LISU LETTER BA + check(SV("*\u3041*"), SV("{:*^4}"), SV("\u3041")); // U+3041 HIRAGANA LETTER SMALL A + check(SV("*\ua4d0*"), SV("{:*^3}"), SV("\ua4d0")); // U+A4D0 LISU LETTER BA check(SV("*\uac00*"), SV("{:*^4}"), SV("\uac00")); // check(SV("*\ud7a3*"), SV("{:*^4}"), SV("\ud7a3")); // Hangul Syllable Hih @@ -94,9 +94,9 @@ check(SV("*\ufe19*"), SV("{:*^4}"), SV("\ufe19")); // PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS check(SV("*\ufe30*"), SV("{:*^4}"), SV("\ufe30")); // PRESENTATION FORM FOR VERTICAL TWO DOT LEADER - check(SV("*\ufe6f*"), SV("{:*^4}"), SV("\ufe6f")); // U+FE70 ARABIC FATHATAN ISOLATED FORM + check(SV("*\ufe70*"), SV("{:*^3}"), SV("\ufe70")); // U+FE70 ARABIC FATHATAN ISOLATED FORM - check(SV("*\uff00*"), SV("{:*^4}"), SV("\uff00")); // U+FF01 FULLWIDTH EXCLAMATION MARK + check(SV("*\uff01*"), SV("{:*^4}"), SV("\uff01")); // U+FF01 FULLWIDTH EXCLAMATION MARK check(SV("*\uff60*"), SV("{:*^4}"), SV("\uff60")); // FULLWIDTH RIGHT WHITE PARENTHESIS check(SV("*\uffe0*"), SV("{:*^4}"), SV("\uffe0")); // FULLWIDTH CENT SIGN @@ -143,8 +143,8 @@ check(SV("***"), SV("{:*^3.1}"), SV("\u2e80")); // CJK RADICAL REPEAT check(SV("***"), SV("{:*^3.1}"), SV("\u303e")); // IDEOGRAPHIC VARIATION INDICATOR - check(SV("***"), SV("{:*^3.1}"), SV("\u3040")); // U+3041 HIRAGANA LETTER SMALL A - check(SV("***"), SV("{:*^3.1}"), SV("\ua4cf")); // U+A4D0 LISU LETTER BA + check(SV("***"), SV("{:*^3.1}"), SV("\u3041")); // U+3041 HIRAGANA LETTER SMALL A + check(SV("*\ua4d0*"), SV("{:*^3.1}"), SV("\ua4d0")); // U+A4D0 LISU LETTER BA check(SV("***"), SV("{:*^3.1}"), SV("\uac00")); // check(SV("***"), SV("{:*^3.1}"), SV("\ud7a3")); // Hangul Syllable Hih @@ -156,9 +156,9 @@ check(SV("***"), SV("{:*^3.1}"), SV("\ufe19")); // PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS check(SV("***"), SV("{:*^3.1}"), SV("\ufe30")); // PRESENTATION FORM FOR VERTICAL TWO DOT LEADER - check(SV("***"), SV("{:*^3.1}"), SV("\ufe6f")); // U+FE70 ARABIC FATHATAN ISOLATED FORM + check(SV("*\ufe70*"), SV("{:*^3.1}"), SV("\ufe70")); // U+FE70 ARABIC FATHATAN ISOLATED FORM - check(SV("***"), SV("{:*^3.1}"), SV("\uff00")); // U+FF01 FULLWIDTH EXCLAMATION MARK + check(SV("***"), SV("{:*^3.1}"), SV("\uff01")); // U+FF01 FULLWIDTH EXCLAMATION MARK check(SV("***"), SV("{:*^3.1}"), SV("\uff60")); // FULLWIDTH RIGHT WHITE PARENTHESIS check(SV("***"), SV("{:*^3.1}"), SV("\uffe0")); // FULLWIDTH CENT SIGN diff --git a/libcxx/utils/CMakeLists.txt b/libcxx/utils/CMakeLists.txt --- a/libcxx/utils/CMakeLists.txt +++ b/libcxx/utils/CMakeLists.txt @@ -32,6 +32,13 @@ "${LIBCXX_SOURCE_DIR}/include/__format/escaped_output_table.h" COMMENT "Generate the escaped output header") +add_custom_target(libcxx-generate-width-estimation-table + COMMAND + "${Python3_EXECUTABLE}" + "${LIBCXX_SOURCE_DIR}/utils/generate_width_estimation_table.py" + "${LIBCXX_SOURCE_DIR}/include/__format/width_estimation_table.h" + COMMENT "Generate the width estimation header") + add_custom_target(libcxx-generate-iwyu-mapping COMMAND "${Python3_EXECUTABLE}" @@ -45,5 +52,6 @@ libcxx-generate-extended-grapheme-cluster-tables libcxx-generate-extended-grapheme-cluster-tests libcxx-generate-escaped-output-table + libcxx-generate-width-estimation-table libcxx-generate-iwyu-mapping COMMENT "Create all the auto-generated files in libc++ and its tests.") diff --git a/libcxx/utils/data/unicode/EastAsianWidth.txt b/libcxx/utils/data/unicode/EastAsianWidth.txt new file mode 100644 --- /dev/null +++ b/libcxx/utils/data/unicode/EastAsianWidth.txt @@ -0,0 +1,2619 @@ +# EastAsianWidth-15.0.0.txt +# Date: 2022-05-24, 17:40:20 GMT [KW, LI] +# © 2022 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see https://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see https://www.unicode.org/reports/tr44/ +# +# East_Asian_Width Property +# +# This file is a normative contributory data file in the +# Unicode Character Database. +# +# The format is two fields separated by a semicolon. +# Field 0: Unicode code point value or range of code point values +# Field 1: East_Asian_Width property, consisting of one of the following values: +# "A", "F", "H", "N", "Na", "W" +# - All code points, assigned or unassigned, that are not listed +# explicitly are given the value "N". +# - The unassigned code points in the following blocks default to "W": +# CJK Unified Ideographs Extension A: U+3400..U+4DBF +# CJK Unified Ideographs: U+4E00..U+9FFF +# CJK Compatibility Ideographs: U+F900..U+FAFF +# - All undesignated code points in Planes 2 and 3, whether inside or +# outside of allocated blocks, default to "W": +# Plane 2: U+20000..U+2FFFD +# Plane 3: U+30000..U+3FFFD +# +# Character ranges are specified as for other property files in the +# Unicode Character Database. +# +# For legacy reasons, there are no spaces before or after the semicolon +# which separates the two fields. The comments following the number sign +# "#" list the General_Category property value or the L& alias of the +# derived value LC, the Unicode character name or names, and, in lines +# with ranges of code points, the code point count in square brackets. +# +# For more information, see UAX #11: East Asian Width, +# at https://www.unicode.org/reports/tr11/ +# +# @missing: 0000..10FFFF; N +0000..001F;N # Cc [32] .. +0020;Na # Zs SPACE +0021..0023;Na # Po [3] EXCLAMATION MARK..NUMBER SIGN +0024;Na # Sc DOLLAR SIGN +0025..0027;Na # Po [3] PERCENT SIGN..APOSTROPHE +0028;Na # Ps LEFT PARENTHESIS +0029;Na # Pe RIGHT PARENTHESIS +002A;Na # Po ASTERISK +002B;Na # Sm PLUS SIGN +002C;Na # Po COMMA +002D;Na # Pd HYPHEN-MINUS +002E..002F;Na # Po [2] FULL STOP..SOLIDUS +0030..0039;Na # Nd [10] DIGIT ZERO..DIGIT NINE +003A..003B;Na # Po [2] COLON..SEMICOLON +003C..003E;Na # Sm [3] LESS-THAN SIGN..GREATER-THAN SIGN +003F..0040;Na # Po [2] QUESTION MARK..COMMERCIAL AT +0041..005A;Na # Lu [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z +005B;Na # Ps LEFT SQUARE BRACKET +005C;Na # Po REVERSE SOLIDUS +005D;Na # Pe RIGHT SQUARE BRACKET +005E;Na # Sk CIRCUMFLEX ACCENT +005F;Na # Pc LOW LINE +0060;Na # Sk GRAVE ACCENT +0061..007A;Na # Ll [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z +007B;Na # Ps LEFT CURLY BRACKET +007C;Na # Sm VERTICAL LINE +007D;Na # Pe RIGHT CURLY BRACKET +007E;Na # Sm TILDE +007F;N # Cc +0080..009F;N # Cc [32] .. +00A0;N # Zs NO-BREAK SPACE +00A1;A # Po INVERTED EXCLAMATION MARK +00A2..00A3;Na # Sc [2] CENT SIGN..POUND SIGN +00A4;A # Sc CURRENCY SIGN +00A5;Na # Sc YEN SIGN +00A6;Na # So BROKEN BAR +00A7;A # Po SECTION SIGN +00A8;A # Sk DIAERESIS +00A9;N # So COPYRIGHT SIGN +00AA;A # Lo FEMININE ORDINAL INDICATOR +00AB;N # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +00AC;Na # Sm NOT SIGN +00AD;A # Cf SOFT HYPHEN +00AE;A # So REGISTERED SIGN +00AF;Na # Sk MACRON +00B0;A # So DEGREE SIGN +00B1;A # Sm PLUS-MINUS SIGN +00B2..00B3;A # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE +00B4;A # Sk ACUTE ACCENT +00B5;N # Ll MICRO SIGN +00B6..00B7;A # Po [2] PILCROW SIGN..MIDDLE DOT +00B8;A # Sk CEDILLA +00B9;A # No SUPERSCRIPT ONE +00BA;A # Lo MASCULINE ORDINAL INDICATOR +00BB;N # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +00BC..00BE;A # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS +00BF;A # Po INVERTED QUESTION MARK +00C0..00C5;N # Lu [6] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER A WITH RING ABOVE +00C6;A # Lu LATIN CAPITAL LETTER AE +00C7..00CF;N # Lu [9] LATIN CAPITAL LETTER C WITH CEDILLA..LATIN CAPITAL LETTER I WITH DIAERESIS +00D0;A # Lu LATIN CAPITAL LETTER ETH +00D1..00D6;N # Lu [6] LATIN CAPITAL LETTER N WITH TILDE..LATIN CAPITAL LETTER O WITH DIAERESIS +00D7;A # Sm MULTIPLICATION SIGN +00D8;A # Lu LATIN CAPITAL LETTER O WITH STROKE +00D9..00DD;N # Lu [5] LATIN CAPITAL LETTER U WITH GRAVE..LATIN CAPITAL LETTER Y WITH ACUTE +00DE..00E1;A # L& [4] LATIN CAPITAL LETTER THORN..LATIN SMALL LETTER A WITH ACUTE +00E2..00E5;N # Ll [4] LATIN SMALL LETTER A WITH CIRCUMFLEX..LATIN SMALL LETTER A WITH RING ABOVE +00E6;A # Ll LATIN SMALL LETTER AE +00E7;N # Ll LATIN SMALL LETTER C WITH CEDILLA +00E8..00EA;A # Ll [3] LATIN SMALL LETTER E WITH GRAVE..LATIN SMALL LETTER E WITH CIRCUMFLEX +00EB;N # Ll LATIN SMALL LETTER E WITH DIAERESIS +00EC..00ED;A # Ll [2] LATIN SMALL LETTER I WITH GRAVE..LATIN SMALL LETTER I WITH ACUTE +00EE..00EF;N # Ll [2] LATIN SMALL LETTER I WITH CIRCUMFLEX..LATIN SMALL LETTER I WITH DIAERESIS +00F0;A # Ll LATIN SMALL LETTER ETH +00F1;N # Ll LATIN SMALL LETTER N WITH TILDE +00F2..00F3;A # Ll [2] LATIN SMALL LETTER O WITH GRAVE..LATIN SMALL LETTER O WITH ACUTE +00F4..00F6;N # Ll [3] LATIN SMALL LETTER O WITH CIRCUMFLEX..LATIN SMALL LETTER O WITH DIAERESIS +00F7;A # Sm DIVISION SIGN +00F8..00FA;A # Ll [3] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER U WITH ACUTE +00FB;N # Ll LATIN SMALL LETTER U WITH CIRCUMFLEX +00FC;A # Ll LATIN SMALL LETTER U WITH DIAERESIS +00FD;N # Ll LATIN SMALL LETTER Y WITH ACUTE +00FE;A # Ll LATIN SMALL LETTER THORN +00FF;N # Ll LATIN SMALL LETTER Y WITH DIAERESIS +0100;N # Lu LATIN CAPITAL LETTER A WITH MACRON +0101;A # Ll LATIN SMALL LETTER A WITH MACRON +0102..0110;N # L& [15] LATIN CAPITAL LETTER A WITH BREVE..LATIN CAPITAL LETTER D WITH STROKE +0111;A # Ll LATIN SMALL LETTER D WITH STROKE +0112;N # Lu LATIN CAPITAL LETTER E WITH MACRON +0113;A # Ll LATIN SMALL LETTER E WITH MACRON +0114..011A;N # L& [7] LATIN CAPITAL LETTER E WITH BREVE..LATIN CAPITAL LETTER E WITH CARON +011B;A # Ll LATIN SMALL LETTER E WITH CARON +011C..0125;N # L& [10] LATIN CAPITAL LETTER G WITH CIRCUMFLEX..LATIN SMALL LETTER H WITH CIRCUMFLEX +0126..0127;A # L& [2] LATIN CAPITAL LETTER H WITH STROKE..LATIN SMALL LETTER H WITH STROKE +0128..012A;N # L& [3] LATIN CAPITAL LETTER I WITH TILDE..LATIN CAPITAL LETTER I WITH MACRON +012B;A # Ll LATIN SMALL LETTER I WITH MACRON +012C..0130;N # L& [5] LATIN CAPITAL LETTER I WITH BREVE..LATIN CAPITAL LETTER I WITH DOT ABOVE +0131..0133;A # L& [3] LATIN SMALL LETTER DOTLESS I..LATIN SMALL LIGATURE IJ +0134..0137;N # L& [4] LATIN CAPITAL LETTER J WITH CIRCUMFLEX..LATIN SMALL LETTER K WITH CEDILLA +0138;A # Ll LATIN SMALL LETTER KRA +0139..013E;N # L& [6] LATIN CAPITAL LETTER L WITH ACUTE..LATIN SMALL LETTER L WITH CARON +013F..0142;A # L& [4] LATIN CAPITAL LETTER L WITH MIDDLE DOT..LATIN SMALL LETTER L WITH STROKE +0143;N # Lu LATIN CAPITAL LETTER N WITH ACUTE +0144;A # Ll LATIN SMALL LETTER N WITH ACUTE +0145..0147;N # L& [3] LATIN CAPITAL LETTER N WITH CEDILLA..LATIN CAPITAL LETTER N WITH CARON +0148..014B;A # L& [4] LATIN SMALL LETTER N WITH CARON..LATIN SMALL LETTER ENG +014C;N # Lu LATIN CAPITAL LETTER O WITH MACRON +014D;A # Ll LATIN SMALL LETTER O WITH MACRON +014E..0151;N # L& [4] LATIN CAPITAL LETTER O WITH BREVE..LATIN SMALL LETTER O WITH DOUBLE ACUTE +0152..0153;A # L& [2] LATIN CAPITAL LIGATURE OE..LATIN SMALL LIGATURE OE +0154..0165;N # L& [18] LATIN CAPITAL LETTER R WITH ACUTE..LATIN SMALL LETTER T WITH CARON +0166..0167;A # L& [2] LATIN CAPITAL LETTER T WITH STROKE..LATIN SMALL LETTER T WITH STROKE +0168..016A;N # L& [3] LATIN CAPITAL LETTER U WITH TILDE..LATIN CAPITAL LETTER U WITH MACRON +016B;A # Ll LATIN SMALL LETTER U WITH MACRON +016C..017F;N # L& [20] LATIN CAPITAL LETTER U WITH BREVE..LATIN SMALL LETTER LONG S +0180..01BA;N # L& [59] LATIN SMALL LETTER B WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL +01BB;N # Lo LATIN LETTER TWO WITH STROKE +01BC..01BF;N # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN +01C0..01C3;N # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK +01C4..01CD;N # L& [10] LATIN CAPITAL LETTER DZ WITH CARON..LATIN CAPITAL LETTER A WITH CARON +01CE;A # Ll LATIN SMALL LETTER A WITH CARON +01CF;N # Lu LATIN CAPITAL LETTER I WITH CARON +01D0;A # Ll LATIN SMALL LETTER I WITH CARON +01D1;N # Lu LATIN CAPITAL LETTER O WITH CARON +01D2;A # Ll LATIN SMALL LETTER O WITH CARON +01D3;N # Lu LATIN CAPITAL LETTER U WITH CARON +01D4;A # Ll LATIN SMALL LETTER U WITH CARON +01D5;N # Lu LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON +01D6;A # Ll LATIN SMALL LETTER U WITH DIAERESIS AND MACRON +01D7;N # Lu LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE +01D8;A # Ll LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE +01D9;N # Lu LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON +01DA;A # Ll LATIN SMALL LETTER U WITH DIAERESIS AND CARON +01DB;N # Lu LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE +01DC;A # Ll LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE +01DD..024F;N # L& [115] LATIN SMALL LETTER TURNED E..LATIN SMALL LETTER Y WITH STROKE +0250;N # Ll LATIN SMALL LETTER TURNED A +0251;A # Ll LATIN SMALL LETTER ALPHA +0252..0260;N # Ll [15] LATIN SMALL LETTER TURNED ALPHA..LATIN SMALL LETTER G WITH HOOK +0261;A # Ll LATIN SMALL LETTER SCRIPT G +0262..0293;N # Ll [50] LATIN LETTER SMALL CAPITAL G..LATIN SMALL LETTER EZH WITH CURL +0294;N # Lo LATIN LETTER GLOTTAL STOP +0295..02AF;N # Ll [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +02B0..02C1;N # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP +02C2..02C3;N # Sk [2] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER RIGHT ARROWHEAD +02C4;A # Sk MODIFIER LETTER UP ARROWHEAD +02C5;N # Sk MODIFIER LETTER DOWN ARROWHEAD +02C6;N # Lm MODIFIER LETTER CIRCUMFLEX ACCENT +02C7;A # Lm CARON +02C8;N # Lm MODIFIER LETTER VERTICAL LINE +02C9..02CB;A # Lm [3] MODIFIER LETTER MACRON..MODIFIER LETTER GRAVE ACCENT +02CC;N # Lm MODIFIER LETTER LOW VERTICAL LINE +02CD;A # Lm MODIFIER LETTER LOW MACRON +02CE..02CF;N # Lm [2] MODIFIER LETTER LOW GRAVE ACCENT..MODIFIER LETTER LOW ACUTE ACCENT +02D0;A # Lm MODIFIER LETTER TRIANGULAR COLON +02D1;N # Lm MODIFIER LETTER HALF TRIANGULAR COLON +02D2..02D7;N # Sk [6] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER MINUS SIGN +02D8..02DB;A # Sk [4] BREVE..OGONEK +02DC;N # Sk SMALL TILDE +02DD;A # Sk DOUBLE ACUTE ACCENT +02DE;N # Sk MODIFIER LETTER RHOTIC HOOK +02DF;A # Sk MODIFIER LETTER CROSS ACCENT +02E0..02E4;N # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +02E5..02EB;N # Sk [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK +02EC;N # Lm MODIFIER LETTER VOICING +02ED;N # Sk MODIFIER LETTER UNASPIRATED +02EE;N # Lm MODIFIER LETTER DOUBLE APOSTROPHE +02EF..02FF;N # Sk [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW +0300..036F;A # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X +0370..0373;N # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI +0374;N # Lm GREEK NUMERAL SIGN +0375;N # Sk GREEK LOWER NUMERAL SIGN +0376..0377;N # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA +037A;N # Lm GREEK YPOGEGRAMMENI +037B..037D;N # Ll [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL +037E;N # Po GREEK QUESTION MARK +037F;N # Lu GREEK CAPITAL LETTER YOT +0384..0385;N # Sk [2] GREEK TONOS..GREEK DIALYTIKA TONOS +0386;N # Lu GREEK CAPITAL LETTER ALPHA WITH TONOS +0387;N # Po GREEK ANO TELEIA +0388..038A;N # Lu [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS +038C;N # Lu GREEK CAPITAL LETTER OMICRON WITH TONOS +038E..0390;N # L& [3] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS +0391..03A1;A # Lu [17] GREEK CAPITAL LETTER ALPHA..GREEK CAPITAL LETTER RHO +03A3..03A9;A # Lu [7] GREEK CAPITAL LETTER SIGMA..GREEK CAPITAL LETTER OMEGA +03AA..03B0;N # L& [7] GREEK CAPITAL LETTER IOTA WITH DIALYTIKA..GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS +03B1..03C1;A # Ll [17] GREEK SMALL LETTER ALPHA..GREEK SMALL LETTER RHO +03C2;N # Ll GREEK SMALL LETTER FINAL SIGMA +03C3..03C9;A # Ll [7] GREEK SMALL LETTER SIGMA..GREEK SMALL LETTER OMEGA +03CA..03F5;N # L& [44] GREEK SMALL LETTER IOTA WITH DIALYTIKA..GREEK LUNATE EPSILON SYMBOL +03F6;N # Sm GREEK REVERSED LUNATE EPSILON SYMBOL +03F7..03FF;N # L& [9] GREEK CAPITAL LETTER SHO..GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL +0400;N # Lu CYRILLIC CAPITAL LETTER IE WITH GRAVE +0401;A # Lu CYRILLIC CAPITAL LETTER IO +0402..040F;N # Lu [14] CYRILLIC CAPITAL LETTER DJE..CYRILLIC CAPITAL LETTER DZHE +0410..044F;A # L& [64] CYRILLIC CAPITAL LETTER A..CYRILLIC SMALL LETTER YA +0450;N # Ll CYRILLIC SMALL LETTER IE WITH GRAVE +0451;A # Ll CYRILLIC SMALL LETTER IO +0452..0481;N # L& [48] CYRILLIC SMALL LETTER DJE..CYRILLIC SMALL LETTER KOPPA +0482;N # So CYRILLIC THOUSANDS SIGN +0483..0487;N # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE +0488..0489;N # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN +048A..04FF;N # L& [118] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER HA WITH STROKE +0500..052F;N # L& [48] CYRILLIC CAPITAL LETTER KOMI DE..CYRILLIC SMALL LETTER EL WITH DESCENDER +0531..0556;N # Lu [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +0559;N # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING +055A..055F;N # Po [6] ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK +0560..0588;N # Ll [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE +0589;N # Po ARMENIAN FULL STOP +058A;N # Pd ARMENIAN HYPHEN +058D..058E;N # So [2] RIGHT-FACING ARMENIAN ETERNITY SIGN..LEFT-FACING ARMENIAN ETERNITY SIGN +058F;N # Sc ARMENIAN DRAM SIGN +0591..05BD;N # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG +05BE;N # Pd HEBREW PUNCTUATION MAQAF +05BF;N # Mn HEBREW POINT RAFE +05C0;N # Po HEBREW PUNCTUATION PASEQ +05C1..05C2;N # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C3;N # Po HEBREW PUNCTUATION SOF PASUQ +05C4..05C5;N # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT +05C6;N # Po HEBREW PUNCTUATION NUN HAFUKHA +05C7;N # Mn HEBREW POINT QAMATS QATAN +05D0..05EA;N # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV +05EF..05F2;N # Lo [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD +05F3..05F4;N # Po [2] HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM +0600..0605;N # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE +0606..0608;N # Sm [3] ARABIC-INDIC CUBE ROOT..ARABIC RAY +0609..060A;N # Po [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN +060B;N # Sc AFGHANI SIGN +060C..060D;N # Po [2] ARABIC COMMA..ARABIC DATE SEPARATOR +060E..060F;N # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA +0610..061A;N # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA +061B;N # Po ARABIC SEMICOLON +061C;N # Cf ARABIC LETTER MARK +061D..061F;N # Po [3] ARABIC END OF TEXT MARK..ARABIC QUESTION MARK +0620..063F;N # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE +0640;N # Lm ARABIC TATWEEL +0641..064A;N # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH +064B..065F;N # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW +0660..0669;N # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE +066A..066D;N # Po [4] ARABIC PERCENT SIGN..ARABIC FIVE POINTED STAR +066E..066F;N # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF +0670;N # Mn ARABIC LETTER SUPERSCRIPT ALEF +0671..06D3;N # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE +06D4;N # Po ARABIC FULL STOP +06D5;N # Lo ARABIC LETTER AE +06D6..06DC;N # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN +06DD;N # Cf ARABIC END OF AYAH +06DE;N # So ARABIC START OF RUB EL HIZB +06DF..06E4;N # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA +06E5..06E6;N # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH +06E7..06E8;N # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +06E9;N # So ARABIC PLACE OF SAJDAH +06EA..06ED;N # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM +06EE..06EF;N # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V +06F0..06F9;N # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE +06FA..06FC;N # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW +06FD..06FE;N # So [2] ARABIC SIGN SINDHI AMPERSAND..ARABIC SIGN SINDHI POSTPOSITION MEN +06FF;N # Lo ARABIC LETTER HEH WITH INVERTED V +0700..070D;N # Po [14] SYRIAC END OF PARAGRAPH..SYRIAC HARKLEAN ASTERISCUS +070F;N # Cf SYRIAC ABBREVIATION MARK +0710;N # Lo SYRIAC LETTER ALAPH +0711;N # Mn SYRIAC LETTER SUPERSCRIPT ALAPH +0712..072F;N # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH +0730..074A;N # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH +074D..074F;N # Lo [3] SYRIAC LETTER SOGDIAN ZHAIN..SYRIAC LETTER SOGDIAN FE +0750..077F;N # Lo [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE +0780..07A5;N # Lo [38] THAANA LETTER HAA..THAANA LETTER WAAVU +07A6..07B0;N # Mn [11] THAANA ABAFILI..THAANA SUKUN +07B1;N # Lo THAANA LETTER NAA +07C0..07C9;N # Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE +07CA..07EA;N # Lo [33] NKO LETTER A..NKO LETTER JONA RA +07EB..07F3;N # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07F4..07F5;N # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE +07F6;N # So NKO SYMBOL OO DENNEN +07F7..07F9;N # Po [3] NKO SYMBOL GBAKURUNEN..NKO EXCLAMATION MARK +07FA;N # Lm NKO LAJANYALAN +07FD;N # Mn NKO DANTAYALAN +07FE..07FF;N # Sc [2] NKO DOROME SIGN..NKO TAMAN SIGN +0800..0815;N # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF +0816..0819;N # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH +081A;N # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT +081B..0823;N # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A +0824;N # Lm SAMARITAN MODIFIER LETTER SHORT A +0825..0827;N # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U +0828;N # Lm SAMARITAN MODIFIER LETTER I +0829..082D;N # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA +0830..083E;N # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU +0840..0858;N # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +0859..085B;N # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +085E;N # Po MANDAIC PUNCTUATION +0860..086A;N # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA +0870..0887;N # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT +0888;N # Sk ARABIC RAISED ROUND DOT +0889..088E;N # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0890..0891;N # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE +0898..089F;N # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +08A0..08C8;N # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF +08C9;N # Lm ARABIC SMALL FARSI YEH +08CA..08E1;N # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA +08E2;N # Cf ARABIC DISPUTED END OF AYAH +08E3..08FF;N # Mn [29] ARABIC TURNED DAMMA BELOW..ARABIC MARK SIDEWAYS NOON GHUNNA +0900..0902;N # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA +0903;N # Mc DEVANAGARI SIGN VISARGA +0904..0939;N # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA +093A;N # Mn DEVANAGARI VOWEL SIGN OE +093B;N # Mc DEVANAGARI VOWEL SIGN OOE +093C;N # Mn DEVANAGARI SIGN NUKTA +093D;N # Lo DEVANAGARI SIGN AVAGRAHA +093E..0940;N # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II +0941..0948;N # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI +0949..094C;N # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU +094D;N # Mn DEVANAGARI SIGN VIRAMA +094E..094F;N # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW +0950;N # Lo DEVANAGARI OM +0951..0957;N # Mn [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE +0958..0961;N # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL +0962..0963;N # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL +0964..0965;N # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA +0966..096F;N # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE +0970;N # Po DEVANAGARI ABBREVIATION SIGN +0971;N # Lm DEVANAGARI SIGN HIGH SPACING DOT +0972..097F;N # Lo [14] DEVANAGARI LETTER CANDRA A..DEVANAGARI LETTER BBA +0980;N # Lo BENGALI ANJI +0981;N # Mn BENGALI SIGN CANDRABINDU +0982..0983;N # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA +0985..098C;N # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L +098F..0990;N # Lo [2] BENGALI LETTER E..BENGALI LETTER AI +0993..09A8;N # Lo [22] BENGALI LETTER O..BENGALI LETTER NA +09AA..09B0;N # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA +09B2;N # Lo BENGALI LETTER LA +09B6..09B9;N # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA +09BC;N # Mn BENGALI SIGN NUKTA +09BD;N # Lo BENGALI SIGN AVAGRAHA +09BE..09C0;N # Mc [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II +09C1..09C4;N # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR +09C7..09C8;N # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI +09CB..09CC;N # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU +09CD;N # Mn BENGALI SIGN VIRAMA +09CE;N # Lo BENGALI LETTER KHANDA TA +09D7;N # Mc BENGALI AU LENGTH MARK +09DC..09DD;N # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA +09DF..09E1;N # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL +09E2..09E3;N # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL +09E6..09EF;N # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE +09F0..09F1;N # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL +09F2..09F3;N # Sc [2] BENGALI RUPEE MARK..BENGALI RUPEE SIGN +09F4..09F9;N # No [6] BENGALI CURRENCY NUMERATOR ONE..BENGALI CURRENCY DENOMINATOR SIXTEEN +09FA;N # So BENGALI ISSHAR +09FB;N # Sc BENGALI GANDA MARK +09FC;N # Lo BENGALI LETTER VEDIC ANUSVARA +09FD;N # Po BENGALI ABBREVIATION SIGN +09FE;N # Mn BENGALI SANDHI MARK +0A01..0A02;N # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI +0A03;N # Mc GURMUKHI SIGN VISARGA +0A05..0A0A;N # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU +0A0F..0A10;N # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI +0A13..0A28;N # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA +0A2A..0A30;N # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA +0A32..0A33;N # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA +0A35..0A36;N # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA +0A38..0A39;N # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA +0A3C;N # Mn GURMUKHI SIGN NUKTA +0A3E..0A40;N # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II +0A41..0A42;N # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU +0A47..0A48;N # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI +0A4B..0A4D;N # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA +0A51;N # Mn GURMUKHI SIGN UDAAT +0A59..0A5C;N # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA +0A5E;N # Lo GURMUKHI LETTER FA +0A66..0A6F;N # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE +0A70..0A71;N # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK +0A72..0A74;N # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR +0A75;N # Mn GURMUKHI SIGN YAKASH +0A76;N # Po GURMUKHI ABBREVIATION SIGN +0A81..0A82;N # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA +0A83;N # Mc GUJARATI SIGN VISARGA +0A85..0A8D;N # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E +0A8F..0A91;N # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O +0A93..0AA8;N # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA +0AAA..0AB0;N # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA +0AB2..0AB3;N # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA +0AB5..0AB9;N # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA +0ABC;N # Mn GUJARATI SIGN NUKTA +0ABD;N # Lo GUJARATI SIGN AVAGRAHA +0ABE..0AC0;N # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II +0AC1..0AC5;N # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E +0AC7..0AC8;N # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI +0AC9;N # Mc GUJARATI VOWEL SIGN CANDRA O +0ACB..0ACC;N # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU +0ACD;N # Mn GUJARATI SIGN VIRAMA +0AD0;N # Lo GUJARATI OM +0AE0..0AE1;N # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL +0AE2..0AE3;N # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AE6..0AEF;N # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE +0AF0;N # Po GUJARATI ABBREVIATION SIGN +0AF1;N # Sc GUJARATI RUPEE SIGN +0AF9;N # Lo GUJARATI LETTER ZHA +0AFA..0AFF;N # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE +0B01;N # Mn ORIYA SIGN CANDRABINDU +0B02..0B03;N # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA +0B05..0B0C;N # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L +0B0F..0B10;N # Lo [2] ORIYA LETTER E..ORIYA LETTER AI +0B13..0B28;N # Lo [22] ORIYA LETTER O..ORIYA LETTER NA +0B2A..0B30;N # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA +0B32..0B33;N # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA +0B35..0B39;N # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA +0B3C;N # Mn ORIYA SIGN NUKTA +0B3D;N # Lo ORIYA SIGN AVAGRAHA +0B3E;N # Mc ORIYA VOWEL SIGN AA +0B3F;N # Mn ORIYA VOWEL SIGN I +0B40;N # Mc ORIYA VOWEL SIGN II +0B41..0B44;N # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR +0B47..0B48;N # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI +0B4B..0B4C;N # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU +0B4D;N # Mn ORIYA SIGN VIRAMA +0B55..0B56;N # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B57;N # Mc ORIYA AU LENGTH MARK +0B5C..0B5D;N # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA +0B5F..0B61;N # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL +0B62..0B63;N # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL +0B66..0B6F;N # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE +0B70;N # So ORIYA ISSHAR +0B71;N # Lo ORIYA LETTER WA +0B72..0B77;N # No [6] ORIYA FRACTION ONE QUARTER..ORIYA FRACTION THREE SIXTEENTHS +0B82;N # Mn TAMIL SIGN ANUSVARA +0B83;N # Lo TAMIL SIGN VISARGA +0B85..0B8A;N # Lo [6] TAMIL LETTER A..TAMIL LETTER UU +0B8E..0B90;N # Lo [3] TAMIL LETTER E..TAMIL LETTER AI +0B92..0B95;N # Lo [4] TAMIL LETTER O..TAMIL LETTER KA +0B99..0B9A;N # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA +0B9C;N # Lo TAMIL LETTER JA +0B9E..0B9F;N # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA +0BA3..0BA4;N # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA +0BA8..0BAA;N # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA +0BAE..0BB9;N # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA +0BBE..0BBF;N # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I +0BC0;N # Mn TAMIL VOWEL SIGN II +0BC1..0BC2;N # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU +0BC6..0BC8;N # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI +0BCA..0BCC;N # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU +0BCD;N # Mn TAMIL SIGN VIRAMA +0BD0;N # Lo TAMIL OM +0BD7;N # Mc TAMIL AU LENGTH MARK +0BE6..0BEF;N # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE +0BF0..0BF2;N # No [3] TAMIL NUMBER TEN..TAMIL NUMBER ONE THOUSAND +0BF3..0BF8;N # So [6] TAMIL DAY SIGN..TAMIL AS ABOVE SIGN +0BF9;N # Sc TAMIL RUPEE SIGN +0BFA;N # So TAMIL NUMBER SIGN +0C00;N # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE +0C01..0C03;N # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA +0C04;N # Mn TELUGU SIGN COMBINING ANUSVARA ABOVE +0C05..0C0C;N # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L +0C0E..0C10;N # Lo [3] TELUGU LETTER E..TELUGU LETTER AI +0C12..0C28;N # Lo [23] TELUGU LETTER O..TELUGU LETTER NA +0C2A..0C39;N # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA +0C3C;N # Mn TELUGU SIGN NUKTA +0C3D;N # Lo TELUGU SIGN AVAGRAHA +0C3E..0C40;N # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II +0C41..0C44;N # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR +0C46..0C48;N # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI +0C4A..0C4D;N # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA +0C55..0C56;N # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0C58..0C5A;N # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA +0C5D;N # Lo TELUGU LETTER NAKAARA POLLU +0C60..0C61;N # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL +0C62..0C63;N # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL +0C66..0C6F;N # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE +0C77;N # Po TELUGU SIGN SIDDHAM +0C78..0C7E;N # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR +0C7F;N # So TELUGU SIGN TUUMU +0C80;N # Lo KANNADA SIGN SPACING CANDRABINDU +0C81;N # Mn KANNADA SIGN CANDRABINDU +0C82..0C83;N # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA +0C84;N # Po KANNADA SIGN SIDDHAM +0C85..0C8C;N # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L +0C8E..0C90;N # Lo [3] KANNADA LETTER E..KANNADA LETTER AI +0C92..0CA8;N # Lo [23] KANNADA LETTER O..KANNADA LETTER NA +0CAA..0CB3;N # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA +0CB5..0CB9;N # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA +0CBC;N # Mn KANNADA SIGN NUKTA +0CBD;N # Lo KANNADA SIGN AVAGRAHA +0CBE;N # Mc KANNADA VOWEL SIGN AA +0CBF;N # Mn KANNADA VOWEL SIGN I +0CC0..0CC4;N # Mc [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR +0CC6;N # Mn KANNADA VOWEL SIGN E +0CC7..0CC8;N # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI +0CCA..0CCB;N # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO +0CCC..0CCD;N # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA +0CD5..0CD6;N # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0CDD..0CDE;N # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CE0..0CE1;N # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL +0CE2..0CE3;N # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0CE6..0CEF;N # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE +0CF1..0CF2;N # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA +0CF3;N # Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT +0D00..0D01;N # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU +0D02..0D03;N # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA +0D04..0D0C;N # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L +0D0E..0D10;N # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI +0D12..0D3A;N # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA +0D3B..0D3C;N # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA +0D3D;N # Lo MALAYALAM SIGN AVAGRAHA +0D3E..0D40;N # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II +0D41..0D44;N # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR +0D46..0D48;N # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI +0D4A..0D4C;N # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU +0D4D;N # Mn MALAYALAM SIGN VIRAMA +0D4E;N # Lo MALAYALAM LETTER DOT REPH +0D4F;N # So MALAYALAM SIGN PARA +0D54..0D56;N # Lo [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL +0D57;N # Mc MALAYALAM AU LENGTH MARK +0D58..0D5E;N # No [7] MALAYALAM FRACTION ONE ONE-HUNDRED-AND-SIXTIETH..MALAYALAM FRACTION ONE FIFTH +0D5F..0D61;N # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL +0D62..0D63;N # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL +0D66..0D6F;N # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE +0D70..0D78;N # No [9] MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE SIXTEENTHS +0D79;N # So MALAYALAM DATE MARK +0D7A..0D7F;N # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K +0D81;N # Mn SINHALA SIGN CANDRABINDU +0D82..0D83;N # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA +0D85..0D96;N # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA +0D9A..0DB1;N # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA +0DB3..0DBB;N # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA +0DBD;N # Lo SINHALA LETTER DANTAJA LAYANNA +0DC0..0DC6;N # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA +0DCA;N # Mn SINHALA SIGN AL-LAKUNA +0DCF..0DD1;N # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA +0DD2..0DD4;N # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA +0DD6;N # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA +0DD8..0DDF;N # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA +0DE6..0DEF;N # Nd [10] SINHALA LITH DIGIT ZERO..SINHALA LITH DIGIT NINE +0DF2..0DF3;N # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA +0DF4;N # Po SINHALA PUNCTUATION KUNDDALIYA +0E01..0E30;N # Lo [48] THAI CHARACTER KO KAI..THAI CHARACTER SARA A +0E31;N # Mn THAI CHARACTER MAI HAN-AKAT +0E32..0E33;N # Lo [2] THAI CHARACTER SARA AA..THAI CHARACTER SARA AM +0E34..0E3A;N # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU +0E3F;N # Sc THAI CURRENCY SYMBOL BAHT +0E40..0E45;N # Lo [6] THAI CHARACTER SARA E..THAI CHARACTER LAKKHANGYAO +0E46;N # Lm THAI CHARACTER MAIYAMOK +0E47..0E4E;N # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN +0E4F;N # Po THAI CHARACTER FONGMAN +0E50..0E59;N # Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE +0E5A..0E5B;N # Po [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT +0E81..0E82;N # Lo [2] LAO LETTER KO..LAO LETTER KHO SUNG +0E84;N # Lo LAO LETTER KHO TAM +0E86..0E8A;N # Lo [5] LAO LETTER PALI GHA..LAO LETTER SO TAM +0E8C..0EA3;N # Lo [24] LAO LETTER PALI JHA..LAO LETTER LO LING +0EA5;N # Lo LAO LETTER LO LOOT +0EA7..0EB0;N # Lo [10] LAO LETTER WO..LAO VOWEL SIGN A +0EB1;N # Mn LAO VOWEL SIGN MAI KAN +0EB2..0EB3;N # Lo [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM +0EB4..0EBC;N # Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO +0EBD;N # Lo LAO SEMIVOWEL SIGN NYO +0EC0..0EC4;N # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI +0EC6;N # Lm LAO KO LA +0EC8..0ECE;N # Mn [7] LAO TONE MAI EK..LAO YAMAKKAN +0ED0..0ED9;N # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE +0EDC..0EDF;N # Lo [4] LAO HO NO..LAO LETTER KHMU NYO +0F00;N # Lo TIBETAN SYLLABLE OM +0F01..0F03;N # So [3] TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA +0F04..0F12;N # Po [15] TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK RGYA GRAM SHAD +0F13;N # So TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN +0F14;N # Po TIBETAN MARK GTER TSHEG +0F15..0F17;N # So [3] TIBETAN LOGOTYPE SIGN CHAD RTAGS..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS +0F18..0F19;N # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS +0F1A..0F1F;N # So [6] TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG +0F20..0F29;N # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE +0F2A..0F33;N # No [10] TIBETAN DIGIT HALF ONE..TIBETAN DIGIT HALF ZERO +0F34;N # So TIBETAN MARK BSDUS RTAGS +0F35;N # Mn TIBETAN MARK NGAS BZUNG NYI ZLA +0F36;N # So TIBETAN MARK CARET -DZUD RTAGS BZHI MIG CAN +0F37;N # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS +0F38;N # So TIBETAN MARK CHE MGO +0F39;N # Mn TIBETAN MARK TSA -PHRU +0F3A;N # Ps TIBETAN MARK GUG RTAGS GYON +0F3B;N # Pe TIBETAN MARK GUG RTAGS GYAS +0F3C;N # Ps TIBETAN MARK ANG KHANG GYON +0F3D;N # Pe TIBETAN MARK ANG KHANG GYAS +0F3E..0F3F;N # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES +0F40..0F47;N # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA +0F49..0F6C;N # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA +0F71..0F7E;N # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO +0F7F;N # Mc TIBETAN SIGN RNAM BCAD +0F80..0F84;N # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA +0F85;N # Po TIBETAN MARK PALUTA +0F86..0F87;N # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS +0F88..0F8C;N # Lo [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN +0F8D..0F97;N # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA +0F99..0FBC;N # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA +0FBE..0FC5;N # So [8] TIBETAN KU RU KHA..TIBETAN SYMBOL RDO RJE +0FC6;N # Mn TIBETAN SYMBOL PADMA GDAN +0FC7..0FCC;N # So [6] TIBETAN SYMBOL RDO RJE RGYA GRAM..TIBETAN SYMBOL NOR BU BZHI -KHYIL +0FCE..0FCF;N # So [2] TIBETAN SIGN RDEL NAG RDEL DKAR..TIBETAN SIGN RDEL NAG GSUM +0FD0..0FD4;N # Po [5] TIBETAN MARK BSKA- SHOG GI MGO RGYAN..TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA +0FD5..0FD8;N # So [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS +0FD9..0FDA;N # Po [2] TIBETAN MARK LEADING MCHAN RTAGS..TIBETAN MARK TRAILING MCHAN RTAGS +1000..102A;N # Lo [43] MYANMAR LETTER KA..MYANMAR LETTER AU +102B..102C;N # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA +102D..1030;N # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU +1031;N # Mc MYANMAR VOWEL SIGN E +1032..1037;N # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW +1038;N # Mc MYANMAR SIGN VISARGA +1039..103A;N # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT +103B..103C;N # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA +103D..103E;N # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA +103F;N # Lo MYANMAR LETTER GREAT SA +1040..1049;N # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE +104A..104F;N # Po [6] MYANMAR SIGN LITTLE SECTION..MYANMAR SYMBOL GENITIVE +1050..1055;N # Lo [6] MYANMAR LETTER SHA..MYANMAR LETTER VOCALIC LL +1056..1057;N # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR +1058..1059;N # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL +105A..105D;N # Lo [4] MYANMAR LETTER MON NGA..MYANMAR LETTER MON BBE +105E..1060;N # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA +1061;N # Lo MYANMAR LETTER SGAW KAREN SHA +1062..1064;N # Mc [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO +1065..1066;N # Lo [2] MYANMAR LETTER WESTERN PWO KAREN THA..MYANMAR LETTER WESTERN PWO KAREN PWA +1067..106D;N # Mc [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5 +106E..1070;N # Lo [3] MYANMAR LETTER EASTERN PWO KAREN NNA..MYANMAR LETTER EASTERN PWO KAREN GHWA +1071..1074;N # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE +1075..1081;N # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA +1082;N # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA +1083..1084;N # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E +1085..1086;N # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y +1087..108C;N # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 +108D;N # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +108E;N # Lo MYANMAR LETTER RUMAI PALAUNG FA +108F;N # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 +1090..1099;N # Nd [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE +109A..109C;N # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A +109D;N # Mn MYANMAR VOWEL SIGN AITON AI +109E..109F;N # So [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION +10A0..10C5;N # Lu [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7;N # Lu GEORGIAN CAPITAL LETTER YN +10CD;N # Lu GEORGIAN CAPITAL LETTER AEN +10D0..10FA;N # Ll [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10FB;N # Po GEORGIAN PARAGRAPH SEPARATOR +10FC;N # Lm MODIFIER LETTER GEORGIAN NAR +10FD..10FF;N # Ll [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN +1100..115F;W # Lo [96] HANGUL CHOSEONG KIYEOK..HANGUL CHOSEONG FILLER +1160..11FF;N # Lo [160] HANGUL JUNGSEONG FILLER..HANGUL JONGSEONG SSANGNIEUN +1200..1248;N # Lo [73] ETHIOPIC SYLLABLE HA..ETHIOPIC SYLLABLE QWA +124A..124D;N # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE +1250..1256;N # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO +1258;N # Lo ETHIOPIC SYLLABLE QHWA +125A..125D;N # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE +1260..1288;N # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA +128A..128D;N # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE +1290..12B0;N # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA +12B2..12B5;N # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE +12B8..12BE;N # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO +12C0;N # Lo ETHIOPIC SYLLABLE KXWA +12C2..12C5;N # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE +12C8..12D6;N # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O +12D8..1310;N # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA +1312..1315;N # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE +1318..135A;N # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA +135D..135F;N # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK +1360..1368;N # Po [9] ETHIOPIC SECTION MARK..ETHIOPIC PARAGRAPH SEPARATOR +1369..137C;N # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND +1380..138F;N # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE +1390..1399;N # So [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT +13A0..13F5;N # Lu [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +13F8..13FD;N # Ll [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +1400;N # Pd CANADIAN SYLLABICS HYPHEN +1401..166C;N # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA +166D;N # So CANADIAN SYLLABICS CHI SIGN +166E;N # Po CANADIAN SYLLABICS FULL STOP +166F..167F;N # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W +1680;N # Zs OGHAM SPACE MARK +1681..169A;N # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH +169B;N # Ps OGHAM FEATHER MARK +169C;N # Pe OGHAM REVERSED FEATHER MARK +16A0..16EA;N # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X +16EB..16ED;N # Po [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION +16EE..16F0;N # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL +16F1..16F8;N # Lo [8] RUNIC LETTER K..RUNIC LETTER FRANKS CASKET AESC +1700..1711;N # Lo [18] TAGALOG LETTER A..TAGALOG LETTER HA +1712..1714;N # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA +1715;N # Mc TAGALOG SIGN PAMUDPOD +171F;N # Lo TAGALOG LETTER ARCHAIC RA +1720..1731;N # Lo [18] HANUNOO LETTER A..HANUNOO LETTER HA +1732..1733;N # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U +1734;N # Mc HANUNOO SIGN PAMUDPOD +1735..1736;N # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION +1740..1751;N # Lo [18] BUHID LETTER A..BUHID LETTER HA +1752..1753;N # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U +1760..176C;N # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA +176E..1770;N # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA +1772..1773;N # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +1780..17B3;N # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU +17B4..17B5;N # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B6;N # Mc KHMER VOWEL SIGN AA +17B7..17BD;N # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA +17BE..17C5;N # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU +17C6;N # Mn KHMER SIGN NIKAHIT +17C7..17C8;N # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU +17C9..17D3;N # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT +17D4..17D6;N # Po [3] KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH +17D7;N # Lm KHMER SIGN LEK TOO +17D8..17DA;N # Po [3] KHMER SIGN BEYYAL..KHMER SIGN KOOMUUT +17DB;N # Sc KHMER CURRENCY SYMBOL RIEL +17DC;N # Lo KHMER SIGN AVAKRAHASANYA +17DD;N # Mn KHMER SIGN ATTHACAN +17E0..17E9;N # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE +17F0..17F9;N # No [10] KHMER SYMBOL LEK ATTAK SON..KHMER SYMBOL LEK ATTAK PRAM-BUON +1800..1805;N # Po [6] MONGOLIAN BIRGA..MONGOLIAN FOUR DOTS +1806;N # Pd MONGOLIAN TODO SOFT HYPHEN +1807..180A;N # Po [4] MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER..MONGOLIAN NIRUGU +180B..180D;N # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +180E;N # Cf MONGOLIAN VOWEL SEPARATOR +180F;N # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR +1810..1819;N # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE +1820..1842;N # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI +1843;N # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +1844..1878;N # Lo [53] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER CHA WITH TWO DOTS +1880..1884;N # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA +1885..1886;N # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +1887..18A8;N # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA +18A9;N # Mn MONGOLIAN LETTER ALI GALI DAGALGA +18AA;N # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA +18B0..18F5;N # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S +1900..191E;N # Lo [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA +1920..1922;N # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U +1923..1926;N # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU +1927..1928;N # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O +1929..192B;N # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA +1930..1931;N # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA +1932;N # Mn LIMBU SMALL LETTER ANUSVARA +1933..1938;N # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA +1939..193B;N # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I +1940;N # So LIMBU SIGN LOO +1944..1945;N # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK +1946..194F;N # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE +1950..196D;N # Lo [30] TAI LE LETTER KA..TAI LE LETTER AI +1970..1974;N # Lo [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6 +1980..19AB;N # Lo [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA +19B0..19C9;N # Lo [26] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE TONE MARK-2 +19D0..19D9;N # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE +19DA;N # No NEW TAI LUE THAM DIGIT ONE +19DE..19DF;N # So [2] NEW TAI LUE SIGN LAE..NEW TAI LUE SIGN LAEV +19E0..19FF;N # So [32] KHMER SYMBOL PATHAMASAT..KHMER SYMBOL DAP-PRAM ROC +1A00..1A16;N # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA +1A17..1A18;N # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U +1A19..1A1A;N # Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O +1A1B;N # Mn BUGINESE VOWEL SIGN AE +1A1E..1A1F;N # Po [2] BUGINESE PALLAWA..BUGINESE END OF SECTION +1A20..1A54;N # Lo [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA +1A55;N # Mc TAI THAM CONSONANT SIGN MEDIAL RA +1A56;N # Mn TAI THAM CONSONANT SIGN MEDIAL LA +1A57;N # Mc TAI THAM CONSONANT SIGN LA TANG LAI +1A58..1A5E;N # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA +1A60;N # Mn TAI THAM SIGN SAKOT +1A61;N # Mc TAI THAM VOWEL SIGN A +1A62;N # Mn TAI THAM VOWEL SIGN MAI SAT +1A63..1A64;N # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA +1A65..1A6C;N # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW +1A6D..1A72;N # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI +1A73..1A7C;N # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN +1A7F;N # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT +1A80..1A89;N # Nd [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE +1A90..1A99;N # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE +1AA0..1AA6;N # Po [7] TAI THAM SIGN WIANG..TAI THAM SIGN REVERSED ROTATED RANA +1AA7;N # Lm TAI THAM SIGN MAI YAMOK +1AA8..1AAD;N # Po [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG +1AB0..1ABD;N # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW +1ABE;N # Me COMBINING PARENTHESES OVERLAY +1ABF..1ACE;N # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1B00..1B03;N # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG +1B04;N # Mc BALINESE SIGN BISAH +1B05..1B33;N # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA +1B34;N # Mn BALINESE SIGN REREKAN +1B35;N # Mc BALINESE VOWEL SIGN TEDUNG +1B36..1B3A;N # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA +1B3B;N # Mc BALINESE VOWEL SIGN RA REPA TEDUNG +1B3C;N # Mn BALINESE VOWEL SIGN LA LENGA +1B3D..1B41;N # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG +1B42;N # Mn BALINESE VOWEL SIGN PEPET +1B43..1B44;N # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG +1B45..1B4C;N # Lo [8] BALINESE LETTER KAF SASAK..BALINESE LETTER ARCHAIC JNYA +1B50..1B59;N # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE +1B5A..1B60;N # Po [7] BALINESE PANTI..BALINESE PAMENENG +1B61..1B6A;N # So [10] BALINESE MUSICAL SYMBOL DONG..BALINESE MUSICAL SYMBOL DANG GEDE +1B6B..1B73;N # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG +1B74..1B7C;N # So [9] BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG..BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING +1B7D..1B7E;N # Po [2] BALINESE PANTI LANTANG..BALINESE PAMADA LANTANG +1B80..1B81;N # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR +1B82;N # Mc SUNDANESE SIGN PANGWISAD +1B83..1BA0;N # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA +1BA1;N # Mc SUNDANESE CONSONANT SIGN PAMINGKAL +1BA2..1BA5;N # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU +1BA6..1BA7;N # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG +1BA8..1BA9;N # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAA;N # Mc SUNDANESE SIGN PAMAAEH +1BAB..1BAD;N # Mn [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA +1BAE..1BAF;N # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA +1BB0..1BB9;N # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE +1BBA..1BBF;N # Lo [6] SUNDANESE AVAGRAHA..SUNDANESE LETTER FINAL M +1BC0..1BE5;N # Lo [38] BATAK LETTER A..BATAK LETTER U +1BE6;N # Mn BATAK SIGN TOMPI +1BE7;N # Mc BATAK VOWEL SIGN E +1BE8..1BE9;N # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE +1BEA..1BEC;N # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O +1BED;N # Mn BATAK VOWEL SIGN KARO O +1BEE;N # Mc BATAK VOWEL SIGN U +1BEF..1BF1;N # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H +1BF2..1BF3;N # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN +1BFC..1BFF;N # Po [4] BATAK SYMBOL BINDU NA METEK..BATAK SYMBOL BINDU PANGOLAT +1C00..1C23;N # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A +1C24..1C2B;N # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU +1C2C..1C33;N # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T +1C34..1C35;N # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG +1C36..1C37;N # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA +1C3B..1C3F;N # Po [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK +1C40..1C49;N # Nd [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE +1C4D..1C4F;N # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA +1C50..1C59;N # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE +1C5A..1C77;N # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH +1C78..1C7D;N # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD +1C7E..1C7F;N # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD +1C80..1C88;N # Ll [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C90..1CBA;N # Lu [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN +1CBD..1CBF;N # Lu [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN +1CC0..1CC7;N # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA +1CD0..1CD2;N # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA +1CD3;N # Po VEDIC SIGN NIHSHVASA +1CD4..1CE0;N # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA +1CE1;N # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA +1CE2..1CE8;N # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL +1CE9..1CEC;N # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL +1CED;N # Mn VEDIC SIGN TIRYAK +1CEE..1CF3;N # Lo [6] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF4;N # Mn VEDIC TONE CANDRA ABOVE +1CF5..1CF6;N # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA +1CF7;N # Mc VEDIC SIGN ATIKRAMA +1CF8..1CF9;N # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE +1CFA;N # Lo VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA +1D00..1D2B;N # Ll [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL +1D2C..1D6A;N # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77;N # Ll [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G +1D78;N # Lm MODIFIER LETTER CYRILLIC EN +1D79..1D7F;N # Ll [7] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER UPSILON WITH STROKE +1D80..1D9A;N # Ll [27] LATIN SMALL LETTER B WITH PALATAL HOOK..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK +1D9B..1DBF;N # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA +1DC0..1DFF;N # Mn [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +1E00..1EFF;N # L& [256] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER Y WITH LOOP +1F00..1F15;N # L& [22] GREEK SMALL LETTER ALPHA WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA +1F18..1F1D;N # Lu [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F20..1F45;N # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA +1F48..1F4D;N # Lu [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50..1F57;N # Ll [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F59;N # Lu GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B;N # Lu GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D;N # Lu GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F..1F7D;N # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA +1F80..1FB4;N # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6..1FBC;N # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBD;N # Sk GREEK KORONIS +1FBE;N # Ll GREEK PROSGEGRAMMENI +1FBF..1FC1;N # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI +1FC2..1FC4;N # Ll [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6..1FCC;N # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FCD..1FCF;N # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI +1FD0..1FD3;N # Ll [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6..1FDB;N # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA +1FDD..1FDF;N # Sk [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI +1FE0..1FEC;N # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA +1FED..1FEF;N # Sk [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA +1FF2..1FF4;N # Ll [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6..1FFC;N # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +1FFD..1FFE;N # Sk [2] GREEK OXIA..GREEK DASIA +2000..200A;N # Zs [11] EN QUAD..HAIR SPACE +200B..200F;N # Cf [5] ZERO WIDTH SPACE..RIGHT-TO-LEFT MARK +2010;A # Pd HYPHEN +2011..2012;N # Pd [2] NON-BREAKING HYPHEN..FIGURE DASH +2013..2015;A # Pd [3] EN DASH..HORIZONTAL BAR +2016;A # Po DOUBLE VERTICAL LINE +2017;N # Po DOUBLE LOW LINE +2018;A # Pi LEFT SINGLE QUOTATION MARK +2019;A # Pf RIGHT SINGLE QUOTATION MARK +201A;N # Ps SINGLE LOW-9 QUOTATION MARK +201B;N # Pi SINGLE HIGH-REVERSED-9 QUOTATION MARK +201C;A # Pi LEFT DOUBLE QUOTATION MARK +201D;A # Pf RIGHT DOUBLE QUOTATION MARK +201E;N # Ps DOUBLE LOW-9 QUOTATION MARK +201F;N # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK +2020..2022;A # Po [3] DAGGER..BULLET +2023;N # Po TRIANGULAR BULLET +2024..2027;A # Po [4] ONE DOT LEADER..HYPHENATION POINT +2028;N # Zl LINE SEPARATOR +2029;N # Zp PARAGRAPH SEPARATOR +202A..202E;N # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE +202F;N # Zs NARROW NO-BREAK SPACE +2030;A # Po PER MILLE SIGN +2031;N # Po PER TEN THOUSAND SIGN +2032..2033;A # Po [2] PRIME..DOUBLE PRIME +2034;N # Po TRIPLE PRIME +2035;A # Po REVERSED PRIME +2036..2038;N # Po [3] REVERSED DOUBLE PRIME..CARET +2039;N # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK +203A;N # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +203B;A # Po REFERENCE MARK +203C..203D;N # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG +203E;A # Po OVERLINE +203F..2040;N # Pc [2] UNDERTIE..CHARACTER TIE +2041..2043;N # Po [3] CARET INSERTION POINT..HYPHEN BULLET +2044;N # Sm FRACTION SLASH +2045;N # Ps LEFT SQUARE BRACKET WITH QUILL +2046;N # Pe RIGHT SQUARE BRACKET WITH QUILL +2047..2051;N # Po [11] DOUBLE QUESTION MARK..TWO ASTERISKS ALIGNED VERTICALLY +2052;N # Sm COMMERCIAL MINUS SIGN +2053;N # Po SWUNG DASH +2054;N # Pc INVERTED UNDERTIE +2055..205E;N # Po [10] FLOWER PUNCTUATION MARK..VERTICAL FOUR DOTS +205F;N # Zs MEDIUM MATHEMATICAL SPACE +2060..2064;N # Cf [5] WORD JOINER..INVISIBLE PLUS +2066..206F;N # Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES +2070;N # No SUPERSCRIPT ZERO +2071;N # Lm SUPERSCRIPT LATIN SMALL LETTER I +2074;A # No SUPERSCRIPT FOUR +2075..2079;N # No [5] SUPERSCRIPT FIVE..SUPERSCRIPT NINE +207A..207C;N # Sm [3] SUPERSCRIPT PLUS SIGN..SUPERSCRIPT EQUALS SIGN +207D;N # Ps SUPERSCRIPT LEFT PARENTHESIS +207E;N # Pe SUPERSCRIPT RIGHT PARENTHESIS +207F;A # Lm SUPERSCRIPT LATIN SMALL LETTER N +2080;N # No SUBSCRIPT ZERO +2081..2084;A # No [4] SUBSCRIPT ONE..SUBSCRIPT FOUR +2085..2089;N # No [5] SUBSCRIPT FIVE..SUBSCRIPT NINE +208A..208C;N # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN +208D;N # Ps SUBSCRIPT LEFT PARENTHESIS +208E;N # Pe SUBSCRIPT RIGHT PARENTHESIS +2090..209C;N # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +20A0..20A8;N # Sc [9] EURO-CURRENCY SIGN..RUPEE SIGN +20A9;H # Sc WON SIGN +20AA..20AB;N # Sc [2] NEW SHEQEL SIGN..DONG SIGN +20AC;A # Sc EURO SIGN +20AD..20C0;N # Sc [20] KIP SIGN..SOM SIGN +20D0..20DC;N # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE +20DD..20E0;N # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH +20E1;N # Mn COMBINING LEFT RIGHT ARROW ABOVE +20E2..20E4;N # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE +20E5..20F0;N # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE +2100..2101;N # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT +2102;N # Lu DOUBLE-STRUCK CAPITAL C +2103;A # So DEGREE CELSIUS +2104;N # So CENTRE LINE SYMBOL +2105;A # So CARE OF +2106;N # So CADA UNA +2107;N # Lu EULER CONSTANT +2108;N # So SCRUPLE +2109;A # So DEGREE FAHRENHEIT +210A..2112;N # L& [9] SCRIPT SMALL G..SCRIPT CAPITAL L +2113;A # Ll SCRIPT SMALL L +2114;N # So L B BAR SYMBOL +2115;N # Lu DOUBLE-STRUCK CAPITAL N +2116;A # So NUMERO SIGN +2117;N # So SOUND RECORDING COPYRIGHT +2118;N # Sm SCRIPT CAPITAL P +2119..211D;N # Lu [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +211E..2120;N # So [3] PRESCRIPTION TAKE..SERVICE MARK +2121..2122;A # So [2] TELEPHONE SIGN..TRADE MARK SIGN +2123;N # So VERSICLE +2124;N # Lu DOUBLE-STRUCK CAPITAL Z +2125;N # So OUNCE SIGN +2126;A # Lu OHM SIGN +2127;N # So INVERTED OHM SIGN +2128;N # Lu BLACK-LETTER CAPITAL Z +2129;N # So TURNED GREEK SMALL LETTER IOTA +212A;N # Lu KELVIN SIGN +212B;A # Lu ANGSTROM SIGN +212C..212D;N # Lu [2] SCRIPT CAPITAL B..BLACK-LETTER CAPITAL C +212E;N # So ESTIMATED SYMBOL +212F..2134;N # L& [6] SCRIPT SMALL E..SCRIPT SMALL O +2135..2138;N # Lo [4] ALEF SYMBOL..DALET SYMBOL +2139;N # Ll INFORMATION SOURCE +213A..213B;N # So [2] ROTATED CAPITAL Q..FACSIMILE SIGN +213C..213F;N # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI +2140..2144;N # Sm [5] DOUBLE-STRUCK N-ARY SUMMATION..TURNED SANS-SERIF CAPITAL Y +2145..2149;N # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J +214A;N # So PROPERTY LINE +214B;N # Sm TURNED AMPERSAND +214C..214D;N # So [2] PER SIGN..AKTIESELSKAB +214E;N # Ll TURNED SMALL F +214F;N # So SYMBOL FOR SAMARITAN SOURCE +2150..2152;N # No [3] VULGAR FRACTION ONE SEVENTH..VULGAR FRACTION ONE TENTH +2153..2154;A # No [2] VULGAR FRACTION ONE THIRD..VULGAR FRACTION TWO THIRDS +2155..215A;N # No [6] VULGAR FRACTION ONE FIFTH..VULGAR FRACTION FIVE SIXTHS +215B..215E;A # No [4] VULGAR FRACTION ONE EIGHTH..VULGAR FRACTION SEVEN EIGHTHS +215F;N # No FRACTION NUMERATOR ONE +2160..216B;A # Nl [12] ROMAN NUMERAL ONE..ROMAN NUMERAL TWELVE +216C..216F;N # Nl [4] ROMAN NUMERAL FIFTY..ROMAN NUMERAL ONE THOUSAND +2170..2179;A # Nl [10] SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL TEN +217A..2182;N # Nl [9] SMALL ROMAN NUMERAL ELEVEN..ROMAN NUMERAL TEN THOUSAND +2183..2184;N # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C +2185..2188;N # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND +2189;A # No VULGAR FRACTION ZERO THIRDS +218A..218B;N # So [2] TURNED DIGIT TWO..TURNED DIGIT THREE +2190..2194;A # Sm [5] LEFTWARDS ARROW..LEFT RIGHT ARROW +2195..2199;A # So [5] UP DOWN ARROW..SOUTH WEST ARROW +219A..219B;N # Sm [2] LEFTWARDS ARROW WITH STROKE..RIGHTWARDS ARROW WITH STROKE +219C..219F;N # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW +21A0;N # Sm RIGHTWARDS TWO HEADED ARROW +21A1..21A2;N # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL +21A3;N # Sm RIGHTWARDS ARROW WITH TAIL +21A4..21A5;N # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR +21A6;N # Sm RIGHTWARDS ARROW FROM BAR +21A7..21AD;N # So [7] DOWNWARDS ARROW FROM BAR..LEFT RIGHT WAVE ARROW +21AE;N # Sm LEFT RIGHT ARROW WITH STROKE +21AF..21B7;N # So [9] DOWNWARDS ZIGZAG ARROW..CLOCKWISE TOP SEMICIRCLE ARROW +21B8..21B9;A # So [2] NORTH WEST ARROW TO LONG BAR..LEFTWARDS ARROW TO BAR OVER RIGHTWARDS ARROW TO BAR +21BA..21CD;N # So [20] ANTICLOCKWISE OPEN CIRCLE ARROW..LEFTWARDS DOUBLE ARROW WITH STROKE +21CE..21CF;N # Sm [2] LEFT RIGHT DOUBLE ARROW WITH STROKE..RIGHTWARDS DOUBLE ARROW WITH STROKE +21D0..21D1;N # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW +21D2;A # Sm RIGHTWARDS DOUBLE ARROW +21D3;N # So DOWNWARDS DOUBLE ARROW +21D4;A # Sm LEFT RIGHT DOUBLE ARROW +21D5..21E6;N # So [18] UP DOWN DOUBLE ARROW..LEFTWARDS WHITE ARROW +21E7;A # So UPWARDS WHITE ARROW +21E8..21F3;N # So [12] RIGHTWARDS WHITE ARROW..UP DOWN WHITE ARROW +21F4..21FF;N # Sm [12] RIGHT ARROW WITH SMALL CIRCLE..LEFT RIGHT OPEN-HEADED ARROW +2200;A # Sm FOR ALL +2201;N # Sm COMPLEMENT +2202..2203;A # Sm [2] PARTIAL DIFFERENTIAL..THERE EXISTS +2204..2206;N # Sm [3] THERE DOES NOT EXIST..INCREMENT +2207..2208;A # Sm [2] NABLA..ELEMENT OF +2209..220A;N # Sm [2] NOT AN ELEMENT OF..SMALL ELEMENT OF +220B;A # Sm CONTAINS AS MEMBER +220C..220E;N # Sm [3] DOES NOT CONTAIN AS MEMBER..END OF PROOF +220F;A # Sm N-ARY PRODUCT +2210;N # Sm N-ARY COPRODUCT +2211;A # Sm N-ARY SUMMATION +2212..2214;N # Sm [3] MINUS SIGN..DOT PLUS +2215;A # Sm DIVISION SLASH +2216..2219;N # Sm [4] SET MINUS..BULLET OPERATOR +221A;A # Sm SQUARE ROOT +221B..221C;N # Sm [2] CUBE ROOT..FOURTH ROOT +221D..2220;A # Sm [4] PROPORTIONAL TO..ANGLE +2221..2222;N # Sm [2] MEASURED ANGLE..SPHERICAL ANGLE +2223;A # Sm DIVIDES +2224;N # Sm DOES NOT DIVIDE +2225;A # Sm PARALLEL TO +2226;N # Sm NOT PARALLEL TO +2227..222C;A # Sm [6] LOGICAL AND..DOUBLE INTEGRAL +222D;N # Sm TRIPLE INTEGRAL +222E;A # Sm CONTOUR INTEGRAL +222F..2233;N # Sm [5] SURFACE INTEGRAL..ANTICLOCKWISE CONTOUR INTEGRAL +2234..2237;A # Sm [4] THEREFORE..PROPORTION +2238..223B;N # Sm [4] DOT MINUS..HOMOTHETIC +223C..223D;A # Sm [2] TILDE OPERATOR..REVERSED TILDE +223E..2247;N # Sm [10] INVERTED LAZY S..NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO +2248;A # Sm ALMOST EQUAL TO +2249..224B;N # Sm [3] NOT ALMOST EQUAL TO..TRIPLE TILDE +224C;A # Sm ALL EQUAL TO +224D..2251;N # Sm [5] EQUIVALENT TO..GEOMETRICALLY EQUAL TO +2252;A # Sm APPROXIMATELY EQUAL TO OR THE IMAGE OF +2253..225F;N # Sm [13] IMAGE OF OR APPROXIMATELY EQUAL TO..QUESTIONED EQUAL TO +2260..2261;A # Sm [2] NOT EQUAL TO..IDENTICAL TO +2262..2263;N # Sm [2] NOT IDENTICAL TO..STRICTLY EQUIVALENT TO +2264..2267;A # Sm [4] LESS-THAN OR EQUAL TO..GREATER-THAN OVER EQUAL TO +2268..2269;N # Sm [2] LESS-THAN BUT NOT EQUAL TO..GREATER-THAN BUT NOT EQUAL TO +226A..226B;A # Sm [2] MUCH LESS-THAN..MUCH GREATER-THAN +226C..226D;N # Sm [2] BETWEEN..NOT EQUIVALENT TO +226E..226F;A # Sm [2] NOT LESS-THAN..NOT GREATER-THAN +2270..2281;N # Sm [18] NEITHER LESS-THAN NOR EQUAL TO..DOES NOT SUCCEED +2282..2283;A # Sm [2] SUBSET OF..SUPERSET OF +2284..2285;N # Sm [2] NOT A SUBSET OF..NOT A SUPERSET OF +2286..2287;A # Sm [2] SUBSET OF OR EQUAL TO..SUPERSET OF OR EQUAL TO +2288..2294;N # Sm [13] NEITHER A SUBSET OF NOR EQUAL TO..SQUARE CUP +2295;A # Sm CIRCLED PLUS +2296..2298;N # Sm [3] CIRCLED MINUS..CIRCLED DIVISION SLASH +2299;A # Sm CIRCLED DOT OPERATOR +229A..22A4;N # Sm [11] CIRCLED RING OPERATOR..DOWN TACK +22A5;A # Sm UP TACK +22A6..22BE;N # Sm [25] ASSERTION..RIGHT ANGLE WITH ARC +22BF;A # Sm RIGHT TRIANGLE +22C0..22FF;N # Sm [64] N-ARY LOGICAL AND..Z NOTATION BAG MEMBERSHIP +2300..2307;N # So [8] DIAMETER SIGN..WAVY LINE +2308;N # Ps LEFT CEILING +2309;N # Pe RIGHT CEILING +230A;N # Ps LEFT FLOOR +230B;N # Pe RIGHT FLOOR +230C..2311;N # So [6] BOTTOM RIGHT CROP..SQUARE LOZENGE +2312;A # So ARC +2313..2319;N # So [7] SEGMENT..TURNED NOT SIGN +231A..231B;W # So [2] WATCH..HOURGLASS +231C..231F;N # So [4] TOP LEFT CORNER..BOTTOM RIGHT CORNER +2320..2321;N # Sm [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL +2322..2328;N # So [7] FROWN..KEYBOARD +2329;W # Ps LEFT-POINTING ANGLE BRACKET +232A;W # Pe RIGHT-POINTING ANGLE BRACKET +232B..237B;N # So [81] ERASE TO THE LEFT..NOT CHECK MARK +237C;N # Sm RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW +237D..239A;N # So [30] SHOULDERED OPEN BOX..CLEAR SCREEN SYMBOL +239B..23B3;N # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM +23B4..23DB;N # So [40] TOP SQUARE BRACKET..FUSE +23DC..23E1;N # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET +23E2..23E8;N # So [7] WHITE TRAPEZIUM..DECIMAL EXPONENT SYMBOL +23E9..23EC;W # So [4] BLACK RIGHT-POINTING DOUBLE TRIANGLE..BLACK DOWN-POINTING DOUBLE TRIANGLE +23ED..23EF;N # So [3] BLACK RIGHT-POINTING DOUBLE TRIANGLE WITH VERTICAL BAR..BLACK RIGHT-POINTING TRIANGLE WITH DOUBLE VERTICAL BAR +23F0;W # So ALARM CLOCK +23F1..23F2;N # So [2] STOPWATCH..TIMER CLOCK +23F3;W # So HOURGLASS WITH FLOWING SAND +23F4..23FF;N # So [12] BLACK MEDIUM LEFT-POINTING TRIANGLE..OBSERVER EYE SYMBOL +2400..2426;N # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO +2440..244A;N # So [11] OCR HOOK..OCR DOUBLE BACKSLASH +2460..249B;A # No [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP +249C..24E9;A # So [78] PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z +24EA;N # No CIRCLED DIGIT ZERO +24EB..24FF;A # No [21] NEGATIVE CIRCLED NUMBER ELEVEN..NEGATIVE CIRCLED DIGIT ZERO +2500..254B;A # So [76] BOX DRAWINGS LIGHT HORIZONTAL..BOX DRAWINGS HEAVY VERTICAL AND HORIZONTAL +254C..254F;N # So [4] BOX DRAWINGS LIGHT DOUBLE DASH HORIZONTAL..BOX DRAWINGS HEAVY DOUBLE DASH VERTICAL +2550..2573;A # So [36] BOX DRAWINGS DOUBLE HORIZONTAL..BOX DRAWINGS LIGHT DIAGONAL CROSS +2574..257F;N # So [12] BOX DRAWINGS LIGHT LEFT..BOX DRAWINGS HEAVY UP AND LIGHT DOWN +2580..258F;A # So [16] UPPER HALF BLOCK..LEFT ONE EIGHTH BLOCK +2590..2591;N # So [2] RIGHT HALF BLOCK..LIGHT SHADE +2592..2595;A # So [4] MEDIUM SHADE..RIGHT ONE EIGHTH BLOCK +2596..259F;N # So [10] QUADRANT LOWER LEFT..QUADRANT UPPER RIGHT AND LOWER LEFT AND LOWER RIGHT +25A0..25A1;A # So [2] BLACK SQUARE..WHITE SQUARE +25A2;N # So WHITE SQUARE WITH ROUNDED CORNERS +25A3..25A9;A # So [7] WHITE SQUARE CONTAINING BLACK SMALL SQUARE..SQUARE WITH DIAGONAL CROSSHATCH FILL +25AA..25B1;N # So [8] BLACK SMALL SQUARE..WHITE PARALLELOGRAM +25B2..25B3;A # So [2] BLACK UP-POINTING TRIANGLE..WHITE UP-POINTING TRIANGLE +25B4..25B5;N # So [2] BLACK UP-POINTING SMALL TRIANGLE..WHITE UP-POINTING SMALL TRIANGLE +25B6;A # So BLACK RIGHT-POINTING TRIANGLE +25B7;A # Sm WHITE RIGHT-POINTING TRIANGLE +25B8..25BB;N # So [4] BLACK RIGHT-POINTING SMALL TRIANGLE..WHITE RIGHT-POINTING POINTER +25BC..25BD;A # So [2] BLACK DOWN-POINTING TRIANGLE..WHITE DOWN-POINTING TRIANGLE +25BE..25BF;N # So [2] BLACK DOWN-POINTING SMALL TRIANGLE..WHITE DOWN-POINTING SMALL TRIANGLE +25C0;A # So BLACK LEFT-POINTING TRIANGLE +25C1;A # Sm WHITE LEFT-POINTING TRIANGLE +25C2..25C5;N # So [4] BLACK LEFT-POINTING SMALL TRIANGLE..WHITE LEFT-POINTING POINTER +25C6..25C8;A # So [3] BLACK DIAMOND..WHITE DIAMOND CONTAINING BLACK SMALL DIAMOND +25C9..25CA;N # So [2] FISHEYE..LOZENGE +25CB;A # So WHITE CIRCLE +25CC..25CD;N # So [2] DOTTED CIRCLE..CIRCLE WITH VERTICAL FILL +25CE..25D1;A # So [4] BULLSEYE..CIRCLE WITH RIGHT HALF BLACK +25D2..25E1;N # So [16] CIRCLE WITH LOWER HALF BLACK..LOWER HALF CIRCLE +25E2..25E5;A # So [4] BLACK LOWER RIGHT TRIANGLE..BLACK UPPER RIGHT TRIANGLE +25E6..25EE;N # So [9] WHITE BULLET..UP-POINTING TRIANGLE WITH RIGHT HALF BLACK +25EF;A # So LARGE CIRCLE +25F0..25F7;N # So [8] WHITE SQUARE WITH UPPER LEFT QUADRANT..WHITE CIRCLE WITH UPPER RIGHT QUADRANT +25F8..25FC;N # Sm [5] UPPER LEFT TRIANGLE..BLACK MEDIUM SQUARE +25FD..25FE;W # Sm [2] WHITE MEDIUM SMALL SQUARE..BLACK MEDIUM SMALL SQUARE +25FF;N # Sm LOWER RIGHT TRIANGLE +2600..2604;N # So [5] BLACK SUN WITH RAYS..COMET +2605..2606;A # So [2] BLACK STAR..WHITE STAR +2607..2608;N # So [2] LIGHTNING..THUNDERSTORM +2609;A # So SUN +260A..260D;N # So [4] ASCENDING NODE..OPPOSITION +260E..260F;A # So [2] BLACK TELEPHONE..WHITE TELEPHONE +2610..2613;N # So [4] BALLOT BOX..SALTIRE +2614..2615;W # So [2] UMBRELLA WITH RAIN DROPS..HOT BEVERAGE +2616..261B;N # So [6] WHITE SHOGI PIECE..BLACK RIGHT POINTING INDEX +261C;A # So WHITE LEFT POINTING INDEX +261D;N # So WHITE UP POINTING INDEX +261E;A # So WHITE RIGHT POINTING INDEX +261F..263F;N # So [33] WHITE DOWN POINTING INDEX..MERCURY +2640;A # So FEMALE SIGN +2641;N # So EARTH +2642;A # So MALE SIGN +2643..2647;N # So [5] JUPITER..PLUTO +2648..2653;W # So [12] ARIES..PISCES +2654..265F;N # So [12] WHITE CHESS KING..BLACK CHESS PAWN +2660..2661;A # So [2] BLACK SPADE SUIT..WHITE HEART SUIT +2662;N # So WHITE DIAMOND SUIT +2663..2665;A # So [3] BLACK CLUB SUIT..BLACK HEART SUIT +2666;N # So BLACK DIAMOND SUIT +2667..266A;A # So [4] WHITE CLUB SUIT..EIGHTH NOTE +266B;N # So BEAMED EIGHTH NOTES +266C..266D;A # So [2] BEAMED SIXTEENTH NOTES..MUSIC FLAT SIGN +266E;N # So MUSIC NATURAL SIGN +266F;A # Sm MUSIC SHARP SIGN +2670..267E;N # So [15] WEST SYRIAC CROSS..PERMANENT PAPER SIGN +267F;W # So WHEELCHAIR SYMBOL +2680..2692;N # So [19] DIE FACE-1..HAMMER AND PICK +2693;W # So ANCHOR +2694..269D;N # So [10] CROSSED SWORDS..OUTLINED WHITE STAR +269E..269F;A # So [2] THREE LINES CONVERGING RIGHT..THREE LINES CONVERGING LEFT +26A0;N # So WARNING SIGN +26A1;W # So HIGH VOLTAGE SIGN +26A2..26A9;N # So [8] DOUBLED FEMALE SIGN..HORIZONTAL MALE WITH STROKE SIGN +26AA..26AB;W # So [2] MEDIUM WHITE CIRCLE..MEDIUM BLACK CIRCLE +26AC..26BC;N # So [17] MEDIUM SMALL WHITE CIRCLE..SESQUIQUADRATE +26BD..26BE;W # So [2] SOCCER BALL..BASEBALL +26BF;A # So SQUARED KEY +26C0..26C3;N # So [4] WHITE DRAUGHTS MAN..BLACK DRAUGHTS KING +26C4..26C5;W # So [2] SNOWMAN WITHOUT SNOW..SUN BEHIND CLOUD +26C6..26CD;A # So [8] RAIN..DISABLED CAR +26CE;W # So OPHIUCHUS +26CF..26D3;A # So [5] PICK..CHAINS +26D4;W # So NO ENTRY +26D5..26E1;A # So [13] ALTERNATE ONE-WAY LEFT WAY TRAFFIC..RESTRICTED LEFT ENTRY-2 +26E2;N # So ASTRONOMICAL SYMBOL FOR URANUS +26E3;A # So HEAVY CIRCLE WITH STROKE AND TWO DOTS ABOVE +26E4..26E7;N # So [4] PENTAGRAM..INVERTED PENTAGRAM +26E8..26E9;A # So [2] BLACK CROSS ON SHIELD..SHINTO SHRINE +26EA;W # So CHURCH +26EB..26F1;A # So [7] CASTLE..UMBRELLA ON GROUND +26F2..26F3;W # So [2] FOUNTAIN..FLAG IN HOLE +26F4;A # So FERRY +26F5;W # So SAILBOAT +26F6..26F9;A # So [4] SQUARE FOUR CORNERS..PERSON WITH BALL +26FA;W # So TENT +26FB..26FC;A # So [2] JAPANESE BANK SYMBOL..HEADSTONE GRAVEYARD SYMBOL +26FD;W # So FUEL PUMP +26FE..26FF;A # So [2] CUP ON BLACK SQUARE..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE +2700..2704;N # So [5] BLACK SAFETY SCISSORS..WHITE SCISSORS +2705;W # So WHITE HEAVY CHECK MARK +2706..2709;N # So [4] TELEPHONE LOCATION SIGN..ENVELOPE +270A..270B;W # So [2] RAISED FIST..RAISED HAND +270C..2727;N # So [28] VICTORY HAND..WHITE FOUR POINTED STAR +2728;W # So SPARKLES +2729..273C;N # So [20] STRESS OUTLINED WHITE STAR..OPEN CENTRE TEARDROP-SPOKED ASTERISK +273D;A # So HEAVY TEARDROP-SPOKED ASTERISK +273E..274B;N # So [14] SIX PETALLED BLACK AND WHITE FLORETTE..HEAVY EIGHT TEARDROP-SPOKED PROPELLER ASTERISK +274C;W # So CROSS MARK +274D;N # So SHADOWED WHITE CIRCLE +274E;W # So NEGATIVE SQUARED CROSS MARK +274F..2752;N # So [4] LOWER RIGHT DROP-SHADOWED WHITE SQUARE..UPPER RIGHT SHADOWED WHITE SQUARE +2753..2755;W # So [3] BLACK QUESTION MARK ORNAMENT..WHITE EXCLAMATION MARK ORNAMENT +2756;N # So BLACK DIAMOND MINUS WHITE X +2757;W # So HEAVY EXCLAMATION MARK SYMBOL +2758..2767;N # So [16] LIGHT VERTICAL BAR..ROTATED FLORAL HEART BULLET +2768;N # Ps MEDIUM LEFT PARENTHESIS ORNAMENT +2769;N # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT +276A;N # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT +276B;N # Pe MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT +276C;N # Ps MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT +276D;N # Pe MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT +276E;N # Ps HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT +276F;N # Pe HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT +2770;N # Ps HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT +2771;N # Pe HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT +2772;N # Ps LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT +2773;N # Pe LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT +2774;N # Ps MEDIUM LEFT CURLY BRACKET ORNAMENT +2775;N # Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT +2776..277F;A # No [10] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED NUMBER TEN +2780..2793;N # No [20] DINGBAT CIRCLED SANS-SERIF DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN +2794;N # So HEAVY WIDE-HEADED RIGHTWARDS ARROW +2795..2797;W # So [3] HEAVY PLUS SIGN..HEAVY DIVISION SIGN +2798..27AF;N # So [24] HEAVY SOUTH EAST ARROW..NOTCHED LOWER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW +27B0;W # So CURLY LOOP +27B1..27BE;N # So [14] NOTCHED UPPER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW..OPEN-OUTLINED RIGHTWARDS ARROW +27BF;W # So DOUBLE CURLY LOOP +27C0..27C4;N # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET +27C5;N # Ps LEFT S-SHAPED BAG DELIMITER +27C6;N # Pe RIGHT S-SHAPED BAG DELIMITER +27C7..27E5;N # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK +27E6;Na # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET +27E7;Na # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET +27E8;Na # Ps MATHEMATICAL LEFT ANGLE BRACKET +27E9;Na # Pe MATHEMATICAL RIGHT ANGLE BRACKET +27EA;Na # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET +27EB;Na # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET +27EC;Na # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET +27ED;Na # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET +27EE;N # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS +27EF;N # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS +27F0..27FF;N # Sm [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW +2800..28FF;N # So [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678 +2900..297F;N # Sm [128] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..DOWN FISH TAIL +2980..2982;N # Sm [3] TRIPLE VERTICAL BAR DELIMITER..Z NOTATION TYPE COLON +2983;N # Ps LEFT WHITE CURLY BRACKET +2984;N # Pe RIGHT WHITE CURLY BRACKET +2985;Na # Ps LEFT WHITE PARENTHESIS +2986;Na # Pe RIGHT WHITE PARENTHESIS +2987;N # Ps Z NOTATION LEFT IMAGE BRACKET +2988;N # Pe Z NOTATION RIGHT IMAGE BRACKET +2989;N # Ps Z NOTATION LEFT BINDING BRACKET +298A;N # Pe Z NOTATION RIGHT BINDING BRACKET +298B;N # Ps LEFT SQUARE BRACKET WITH UNDERBAR +298C;N # Pe RIGHT SQUARE BRACKET WITH UNDERBAR +298D;N # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER +298E;N # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +298F;N # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +2990;N # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER +2991;N # Ps LEFT ANGLE BRACKET WITH DOT +2992;N # Pe RIGHT ANGLE BRACKET WITH DOT +2993;N # Ps LEFT ARC LESS-THAN BRACKET +2994;N # Pe RIGHT ARC GREATER-THAN BRACKET +2995;N # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET +2996;N # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET +2997;N # Ps LEFT BLACK TORTOISE SHELL BRACKET +2998;N # Pe RIGHT BLACK TORTOISE SHELL BRACKET +2999..29D7;N # Sm [63] DOTTED FENCE..BLACK HOURGLASS +29D8;N # Ps LEFT WIGGLY FENCE +29D9;N # Pe RIGHT WIGGLY FENCE +29DA;N # Ps LEFT DOUBLE WIGGLY FENCE +29DB;N # Pe RIGHT DOUBLE WIGGLY FENCE +29DC..29FB;N # Sm [32] INCOMPLETE INFINITY..TRIPLE PLUS +29FC;N # Ps LEFT-POINTING CURVED ANGLE BRACKET +29FD;N # Pe RIGHT-POINTING CURVED ANGLE BRACKET +29FE..29FF;N # Sm [2] TINY..MINY +2A00..2AFF;N # Sm [256] N-ARY CIRCLED DOT OPERATOR..N-ARY WHITE VERTICAL BAR +2B00..2B1A;N # So [27] NORTH EAST WHITE ARROW..DOTTED SQUARE +2B1B..2B1C;W # So [2] BLACK LARGE SQUARE..WHITE LARGE SQUARE +2B1D..2B2F;N # So [19] BLACK VERY SMALL SQUARE..WHITE VERTICAL ELLIPSE +2B30..2B44;N # Sm [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET +2B45..2B46;N # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW +2B47..2B4C;N # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR +2B4D..2B4F;N # So [3] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..SHORT BACKSLANTED SOUTH ARROW +2B50;W # So WHITE MEDIUM STAR +2B51..2B54;N # So [4] BLACK SMALL STAR..WHITE RIGHT-POINTING PENTAGON +2B55;W # So HEAVY LARGE CIRCLE +2B56..2B59;A # So [4] HEAVY OVAL WITH OVAL INSIDE..HEAVY CIRCLED SALTIRE +2B5A..2B73;N # So [26] SLANTED NORTH ARROW WITH HOOKED HEAD..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR +2B76..2B95;N # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW +2B97..2BFF;N # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2C00..2C5F;N # L& [96] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC SMALL LETTER CAUDATE CHRIVI +2C60..2C7B;N # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D;N # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V +2C7E..2C7F;N # Lu [2] LATIN CAPITAL LETTER S WITH SWASH TAIL..LATIN CAPITAL LETTER Z WITH SWASH TAIL +2C80..2CE4;N # L& [101] COPTIC CAPITAL LETTER ALFA..COPTIC SYMBOL KAI +2CE5..2CEA;N # So [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA +2CEB..2CEE;N # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CEF..2CF1;N # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2CF2..2CF3;N # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI +2CF9..2CFC;N # Po [4] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN VERSE DIVIDER +2CFD;N # No COPTIC FRACTION ONE HALF +2CFE..2CFF;N # Po [2] COPTIC FULL STOP..COPTIC MORPHOLOGICAL DIVIDER +2D00..2D25;N # Ll [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27;N # Ll GEORGIAN SMALL LETTER YN +2D2D;N # Ll GEORGIAN SMALL LETTER AEN +2D30..2D67;N # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO +2D6F;N # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK +2D70;N # Po TIFINAGH SEPARATOR MARK +2D7F;N # Mn TIFINAGH CONSONANT JOINER +2D80..2D96;N # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE +2DA0..2DA6;N # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO +2DA8..2DAE;N # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO +2DB0..2DB6;N # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO +2DB8..2DBE;N # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO +2DC0..2DC6;N # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO +2DC8..2DCE;N # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO +2DD0..2DD6;N # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO +2DD8..2DDE;N # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO +2DE0..2DFF;N # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +2E00..2E01;N # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER +2E02;N # Pi LEFT SUBSTITUTION BRACKET +2E03;N # Pf RIGHT SUBSTITUTION BRACKET +2E04;N # Pi LEFT DOTTED SUBSTITUTION BRACKET +2E05;N # Pf RIGHT DOTTED SUBSTITUTION BRACKET +2E06..2E08;N # Po [3] RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER +2E09;N # Pi LEFT TRANSPOSITION BRACKET +2E0A;N # Pf RIGHT TRANSPOSITION BRACKET +2E0B;N # Po RAISED SQUARE +2E0C;N # Pi LEFT RAISED OMISSION BRACKET +2E0D;N # Pf RIGHT RAISED OMISSION BRACKET +2E0E..2E16;N # Po [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE +2E17;N # Pd DOUBLE OBLIQUE HYPHEN +2E18..2E19;N # Po [2] INVERTED INTERROBANG..PALM BRANCH +2E1A;N # Pd HYPHEN WITH DIAERESIS +2E1B;N # Po TILDE WITH RING ABOVE +2E1C;N # Pi LEFT LOW PARAPHRASE BRACKET +2E1D;N # Pf RIGHT LOW PARAPHRASE BRACKET +2E1E..2E1F;N # Po [2] TILDE WITH DOT ABOVE..TILDE WITH DOT BELOW +2E20;N # Pi LEFT VERTICAL BAR WITH QUILL +2E21;N # Pf RIGHT VERTICAL BAR WITH QUILL +2E22;N # Ps TOP LEFT HALF BRACKET +2E23;N # Pe TOP RIGHT HALF BRACKET +2E24;N # Ps BOTTOM LEFT HALF BRACKET +2E25;N # Pe BOTTOM RIGHT HALF BRACKET +2E26;N # Ps LEFT SIDEWAYS U BRACKET +2E27;N # Pe RIGHT SIDEWAYS U BRACKET +2E28;N # Ps LEFT DOUBLE PARENTHESIS +2E29;N # Pe RIGHT DOUBLE PARENTHESIS +2E2A..2E2E;N # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK +2E2F;N # Lm VERTICAL TILDE +2E30..2E39;N # Po [10] RING POINT..TOP HALF SECTION SIGN +2E3A..2E3B;N # Pd [2] TWO-EM DASH..THREE-EM DASH +2E3C..2E3F;N # Po [4] STENOGRAPHIC FULL STOP..CAPITULUM +2E40;N # Pd DOUBLE HYPHEN +2E41;N # Po REVERSED COMMA +2E42;N # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK +2E43..2E4F;N # Po [13] DASH WITH LEFT UPTURN..CORNISH VERSE DIVIDER +2E50..2E51;N # So [2] CROSS PATTY WITH RIGHT CROSSBAR..CROSS PATTY WITH LEFT CROSSBAR +2E52..2E54;N # Po [3] TIRONIAN SIGN CAPITAL ET..MEDIEVAL QUESTION MARK +2E55;N # Ps LEFT SQUARE BRACKET WITH STROKE +2E56;N # Pe RIGHT SQUARE BRACKET WITH STROKE +2E57;N # Ps LEFT SQUARE BRACKET WITH DOUBLE STROKE +2E58;N # Pe RIGHT SQUARE BRACKET WITH DOUBLE STROKE +2E59;N # Ps TOP HALF LEFT PARENTHESIS +2E5A;N # Pe TOP HALF RIGHT PARENTHESIS +2E5B;N # Ps BOTTOM HALF LEFT PARENTHESIS +2E5C;N # Pe BOTTOM HALF RIGHT PARENTHESIS +2E5D;N # Pd OBLIQUE HYPHEN +2E80..2E99;W # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP +2E9B..2EF3;W # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE +2F00..2FD5;W # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE +2FF0..2FFB;W # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID +3000;F # Zs IDEOGRAPHIC SPACE +3001..3003;W # Po [3] IDEOGRAPHIC COMMA..DITTO MARK +3004;W # So JAPANESE INDUSTRIAL STANDARD SYMBOL +3005;W # Lm IDEOGRAPHIC ITERATION MARK +3006;W # Lo IDEOGRAPHIC CLOSING MARK +3007;W # Nl IDEOGRAPHIC NUMBER ZERO +3008;W # Ps LEFT ANGLE BRACKET +3009;W # Pe RIGHT ANGLE BRACKET +300A;W # Ps LEFT DOUBLE ANGLE BRACKET +300B;W # Pe RIGHT DOUBLE ANGLE BRACKET +300C;W # Ps LEFT CORNER BRACKET +300D;W # Pe RIGHT CORNER BRACKET +300E;W # Ps LEFT WHITE CORNER BRACKET +300F;W # Pe RIGHT WHITE CORNER BRACKET +3010;W # Ps LEFT BLACK LENTICULAR BRACKET +3011;W # Pe RIGHT BLACK LENTICULAR BRACKET +3012..3013;W # So [2] POSTAL MARK..GETA MARK +3014;W # Ps LEFT TORTOISE SHELL BRACKET +3015;W # Pe RIGHT TORTOISE SHELL BRACKET +3016;W # Ps LEFT WHITE LENTICULAR BRACKET +3017;W # Pe RIGHT WHITE LENTICULAR BRACKET +3018;W # Ps LEFT WHITE TORTOISE SHELL BRACKET +3019;W # Pe RIGHT WHITE TORTOISE SHELL BRACKET +301A;W # Ps LEFT WHITE SQUARE BRACKET +301B;W # Pe RIGHT WHITE SQUARE BRACKET +301C;W # Pd WAVE DASH +301D;W # Ps REVERSED DOUBLE PRIME QUOTATION MARK +301E..301F;W # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK +3020;W # So POSTAL MARK FACE +3021..3029;W # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE +302A..302D;W # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F;W # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +3030;W # Pd WAVY DASH +3031..3035;W # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +3036..3037;W # So [2] CIRCLED POSTAL MARK..IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL +3038..303A;W # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY +303B;W # Lm VERTICAL IDEOGRAPHIC ITERATION MARK +303C;W # Lo MASU MARK +303D;W # Po PART ALTERNATION MARK +303E;W # So IDEOGRAPHIC VARIATION INDICATOR +303F;N # So IDEOGRAPHIC HALF FILL SPACE +3041..3096;W # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE +3099..309A;W # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +309B..309C;W # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +309D..309E;W # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK +309F;W # Lo HIRAGANA DIGRAPH YORI +30A0;W # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN +30A1..30FA;W # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO +30FB;W # Po KATAKANA MIDDLE DOT +30FC..30FE;W # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK +30FF;W # Lo KATAKANA DIGRAPH KOTO +3105..312F;W # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN +3131..318E;W # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE +3190..3191;W # So [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK +3192..3195;W # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK +3196..319F;W # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK +31A0..31BF;W # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH +31C0..31E3;W # So [36] CJK STROKE T..CJK STROKE Q +31F0..31FF;W # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO +3200..321E;W # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU +3220..3229;W # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN +322A..3247;W # So [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO +3248..324F;A # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE +3250;W # So PARTNERSHIP SIGN +3251..325F;W # No [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE +3260..327F;W # So [32] CIRCLED HANGUL KIYEOK..KOREAN STANDARD SYMBOL +3280..3289;W # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN +328A..32B0;W # So [39] CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT +32B1..32BF;W # No [15] CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY +32C0..32FF;W # So [64] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..SQUARE ERA NAME REIWA +3300..33FF;W # So [256] SQUARE APAATO..SQUARE GAL +3400..4DBF;W # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF +4DC0..4DFF;N # So [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION +4E00..9FFF;W # Lo [20992] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFF +A000..A014;W # Lo [21] YI SYLLABLE IT..YI SYLLABLE E +A015;W # Lm YI SYLLABLE WU +A016..A48C;W # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR +A490..A4C6;W # So [55] YI RADICAL QOT..YI RADICAL KE +A4D0..A4F7;N # Lo [40] LISU LETTER BA..LISU LETTER OE +A4F8..A4FD;N # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU +A4FE..A4FF;N # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP +A500..A60B;N # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG +A60C;N # Lm VAI SYLLABLE LENGTHENER +A60D..A60F;N # Po [3] VAI COMMA..VAI QUESTION MARK +A610..A61F;N # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG +A620..A629;N # Nd [10] VAI DIGIT ZERO..VAI DIGIT NINE +A62A..A62B;N # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO +A640..A66D;N # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O +A66E;N # Lo CYRILLIC LETTER MULTIOCULAR O +A66F;N # Mn COMBINING CYRILLIC VZMET +A670..A672;N # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN +A673;N # Po SLAVONIC ASTERISK +A674..A67D;N # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A67E;N # Po CYRILLIC KAVYKA +A67F;N # Lm CYRILLIC PAYEROK +A680..A69B;N # L& [28] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER CROSSED O +A69C..A69D;N # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A69E..A69F;N # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E +A6A0..A6E5;N # Lo [70] BAMUM LETTER A..BAMUM LETTER KI +A6E6..A6EF;N # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM +A6F0..A6F1;N # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS +A6F2..A6F7;N # Po [6] BAMUM NJAEMLI..BAMUM QUESTION MARK +A700..A716;N # Sk [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR +A717..A71F;N # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK +A720..A721;N # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE +A722..A76F;N # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON +A770;N # Lm MODIFIER LETTER US +A771..A787;N # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T +A788;N # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A789..A78A;N # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN +A78B..A78E;N # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT +A78F;N # Lo LATIN LETTER SINOLOGICAL DOT +A790..A7CA;N # L& [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A7D0..A7D1;N # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A7D3;N # Ll LATIN SMALL LETTER DOUBLE THORN +A7D5..A7D9;N # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7F2..A7F4;N # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F5..A7F6;N # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H +A7F7;N # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I +A7F8..A7F9;N # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A7FA;N # Ll LATIN LETTER SMALL CAPITAL TURNED M +A7FB..A7FF;N # Lo [5] LATIN EPIGRAPHIC LETTER REVERSED F..LATIN EPIGRAPHIC LETTER ARCHAIC M +A800..A801;N # Lo [2] SYLOTI NAGRI LETTER A..SYLOTI NAGRI LETTER I +A802;N # Mn SYLOTI NAGRI SIGN DVISVARA +A803..A805;N # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O +A806;N # Mn SYLOTI NAGRI SIGN HASANTA +A807..A80A;N # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO +A80B;N # Mn SYLOTI NAGRI SIGN ANUSVARA +A80C..A822;N # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO +A823..A824;N # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I +A825..A826;N # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E +A827;N # Mc SYLOTI NAGRI VOWEL SIGN OO +A828..A82B;N # So [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POETRY MARK-4 +A82C;N # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA +A830..A835;N # No [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE SIXTEENTHS +A836..A837;N # So [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK +A838;N # Sc NORTH INDIC RUPEE MARK +A839;N # So NORTH INDIC QUANTITY MARK +A840..A873;N # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU +A874..A877;N # Po [4] PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOUBLE SHAD +A880..A881;N # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA +A882..A8B3;N # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA +A8B4..A8C3;N # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU +A8C4..A8C5;N # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU +A8CE..A8CF;N # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA +A8D0..A8D9;N # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE +A8E0..A8F1;N # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA +A8F2..A8F7;N # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA +A8F8..A8FA;N # Po [3] DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET +A8FB;N # Lo DEVANAGARI HEADSTROKE +A8FC;N # Po DEVANAGARI SIGN SIDDHAM +A8FD..A8FE;N # Lo [2] DEVANAGARI JAIN OM..DEVANAGARI LETTER AY +A8FF;N # Mn DEVANAGARI VOWEL SIGN AY +A900..A909;N # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE +A90A..A925;N # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO +A926..A92D;N # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU +A92E..A92F;N # Po [2] KAYAH LI SIGN CWI..KAYAH LI SIGN SHYA +A930..A946;N # Lo [23] REJANG LETTER KA..REJANG LETTER A +A947..A951;N # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R +A952..A953;N # Mc [2] REJANG CONSONANT SIGN H..REJANG VIRAMA +A95F;N # Po REJANG SECTION MARK +A960..A97C;W # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH +A980..A982;N # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR +A983;N # Mc JAVANESE SIGN WIGNYAN +A984..A9B2;N # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA +A9B3;N # Mn JAVANESE SIGN CECAK TELU +A9B4..A9B5;N # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG +A9B6..A9B9;N # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT +A9BA..A9BB;N # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE +A9BC..A9BD;N # Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET +A9BE..A9C0;N # Mc [3] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE PANGKON +A9C1..A9CD;N # Po [13] JAVANESE LEFT RERENGGAN..JAVANESE TURNED PADA PISELEH +A9CF;N # Lm JAVANESE PANGRANGKEP +A9D0..A9D9;N # Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE +A9DE..A9DF;N # Po [2] JAVANESE PADA TIRTA TUMETES..JAVANESE PADA ISEN-ISEN +A9E0..A9E4;N # Lo [5] MYANMAR LETTER SHAN GHA..MYANMAR LETTER SHAN BHA +A9E5;N # Mn MYANMAR SIGN SHAN SAW +A9E6;N # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION +A9E7..A9EF;N # Lo [9] MYANMAR LETTER TAI LAING NYA..MYANMAR LETTER TAI LAING NNA +A9F0..A9F9;N # Nd [10] MYANMAR TAI LAING DIGIT ZERO..MYANMAR TAI LAING DIGIT NINE +A9FA..A9FE;N # Lo [5] MYANMAR LETTER TAI LAING LLA..MYANMAR LETTER TAI LAING BHA +AA00..AA28;N # Lo [41] CHAM LETTER A..CHAM LETTER HA +AA29..AA2E;N # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE +AA2F..AA30;N # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI +AA31..AA32;N # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE +AA33..AA34;N # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA +AA35..AA36;N # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA +AA40..AA42;N # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG +AA43;N # Mn CHAM CONSONANT SIGN FINAL NG +AA44..AA4B;N # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS +AA4C;N # Mn CHAM CONSONANT SIGN FINAL M +AA4D;N # Mc CHAM CONSONANT SIGN FINAL H +AA50..AA59;N # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE +AA5C..AA5F;N # Po [4] CHAM PUNCTUATION SPIRAL..CHAM PUNCTUATION TRIPLE DANDA +AA60..AA6F;N # Lo [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA +AA70;N # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION +AA71..AA76;N # Lo [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM +AA77..AA79;N # So [3] MYANMAR SYMBOL AITON EXCLAMATION..MYANMAR SYMBOL AITON TWO +AA7A;N # Lo MYANMAR LETTER AITON RA +AA7B;N # Mc MYANMAR SIGN PAO KAREN TONE +AA7C;N # Mn MYANMAR SIGN TAI LAING TONE-2 +AA7D;N # Mc MYANMAR SIGN TAI LAING TONE-5 +AA7E..AA7F;N # Lo [2] MYANMAR LETTER SHWE PALAUNG CHA..MYANMAR LETTER SHWE PALAUNG SHA +AA80..AAAF;N # Lo [48] TAI VIET LETTER LOW KO..TAI VIET LETTER HIGH O +AAB0;N # Mn TAI VIET MAI KANG +AAB1;N # Lo TAI VIET VOWEL AA +AAB2..AAB4;N # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U +AAB5..AAB6;N # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O +AAB7..AAB8;N # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA +AAB9..AABD;N # Lo [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN +AABE..AABF;N # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK +AAC0;N # Lo TAI VIET TONE MAI NUENG +AAC1;N # Mn TAI VIET TONE MAI THO +AAC2;N # Lo TAI VIET TONE MAI SONG +AADB..AADC;N # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG +AADD;N # Lm TAI VIET SYMBOL SAM +AADE..AADF;N # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI +AAE0..AAEA;N # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAEB;N # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED;N # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF;N # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF0..AAF1;N # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM +AAF2;N # Lo MEETEI MAYEK ANJI +AAF3..AAF4;N # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF5;N # Mc MEETEI MAYEK VOWEL SIGN VISARGA +AAF6;N # Mn MEETEI MAYEK VIRAMA +AB01..AB06;N # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO +AB09..AB0E;N # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO +AB11..AB16;N # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO +AB20..AB26;N # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO +AB28..AB2E;N # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO +AB30..AB5A;N # Ll [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG +AB5B;N # Sk MODIFIER BREVE WITH INVERTED BREVE +AB5C..AB5F;N # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +AB60..AB68;N # Ll [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE +AB69;N # Lm MODIFIER LETTER SMALL TURNED W +AB6A..AB6B;N # Sk [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK +AB70..ABBF;N # Ll [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA +ABC0..ABE2;N # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM +ABE3..ABE4;N # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP +ABE5;N # Mn MEETEI MAYEK VOWEL SIGN ANAP +ABE6..ABE7;N # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP +ABE8;N # Mn MEETEI MAYEK VOWEL SIGN UNAP +ABE9..ABEA;N # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG +ABEB;N # Po MEETEI MAYEK CHEIKHEI +ABEC;N # Mc MEETEI MAYEK LUM IYEK +ABED;N # Mn MEETEI MAYEK APUN IYEK +ABF0..ABF9;N # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE +AC00..D7A3;W # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH +D7B0..D7C6;N # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E +D7CB..D7FB;N # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH +D800..DB7F;N # Cs [896] .. +DB80..DBFF;N # Cs [128] .. +DC00..DFFF;N # Cs [1024] .. +E000..F8FF;A # Co [6400] .. +F900..FA6D;W # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA6E..FA6F;W # Cn [2] .. +FA70..FAD9;W # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 +FADA..FAFF;W # Cn [38] .. +FB00..FB06;N # Ll [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FB13..FB17;N # Ll [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH +FB1D;N # Lo HEBREW LETTER YOD WITH HIRIQ +FB1E;N # Mn HEBREW POINT JUDEO-SPANISH VARIKA +FB1F..FB28;N # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV +FB29;N # Sm HEBREW LETTER ALTERNATIVE PLUS SIGN +FB2A..FB36;N # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH +FB38..FB3C;N # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH +FB3E;N # Lo HEBREW LETTER MEM WITH DAGESH +FB40..FB41;N # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH +FB43..FB44;N # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH +FB46..FB4F;N # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE ALEF LAMED +FB50..FBB1;N # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM +FBB2..FBC2;N # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE +FBD3..FD3D;N # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM +FD3E;N # Pe ORNATE LEFT PARENTHESIS +FD3F;N # Ps ORNATE RIGHT PARENTHESIS +FD40..FD4F;N # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH +FD50..FD8F;N # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD92..FDC7;N # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM +FDCF;N # So ARABIC LIGATURE SALAAMUHU ALAYNAA +FDF0..FDFB;N # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU +FDFC;N # Sc RIAL SIGN +FDFD..FDFF;N # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL +FE00..FE0F;A # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 +FE10..FE16;W # Po [7] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK +FE17;W # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET +FE18;W # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET +FE19;W # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS +FE20..FE2F;N # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF +FE30;W # Po PRESENTATION FORM FOR VERTICAL TWO DOT LEADER +FE31..FE32;W # Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH +FE33..FE34;W # Pc [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE +FE35;W # Ps PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS +FE36;W # Pe PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS +FE37;W # Ps PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET +FE38;W # Pe PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET +FE39;W # Ps PRESENTATION FORM FOR VERTICAL LEFT TORTOISE SHELL BRACKET +FE3A;W # Pe PRESENTATION FORM FOR VERTICAL RIGHT TORTOISE SHELL BRACKET +FE3B;W # Ps PRESENTATION FORM FOR VERTICAL LEFT BLACK LENTICULAR BRACKET +FE3C;W # Pe PRESENTATION FORM FOR VERTICAL RIGHT BLACK LENTICULAR BRACKET +FE3D;W # Ps PRESENTATION FORM FOR VERTICAL LEFT DOUBLE ANGLE BRACKET +FE3E;W # Pe PRESENTATION FORM FOR VERTICAL RIGHT DOUBLE ANGLE BRACKET +FE3F;W # Ps PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET +FE40;W # Pe PRESENTATION FORM FOR VERTICAL RIGHT ANGLE BRACKET +FE41;W # Ps PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET +FE42;W # Pe PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET +FE43;W # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET +FE44;W # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET +FE45..FE46;W # Po [2] SESAME DOT..WHITE SESAME DOT +FE47;W # Ps PRESENTATION FORM FOR VERTICAL LEFT SQUARE BRACKET +FE48;W # Pe PRESENTATION FORM FOR VERTICAL RIGHT SQUARE BRACKET +FE49..FE4C;W # Po [4] DASHED OVERLINE..DOUBLE WAVY OVERLINE +FE4D..FE4F;W # Pc [3] DASHED LOW LINE..WAVY LOW LINE +FE50..FE52;W # Po [3] SMALL COMMA..SMALL FULL STOP +FE54..FE57;W # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK +FE58;W # Pd SMALL EM DASH +FE59;W # Ps SMALL LEFT PARENTHESIS +FE5A;W # Pe SMALL RIGHT PARENTHESIS +FE5B;W # Ps SMALL LEFT CURLY BRACKET +FE5C;W # Pe SMALL RIGHT CURLY BRACKET +FE5D;W # Ps SMALL LEFT TORTOISE SHELL BRACKET +FE5E;W # Pe SMALL RIGHT TORTOISE SHELL BRACKET +FE5F..FE61;W # Po [3] SMALL NUMBER SIGN..SMALL ASTERISK +FE62;W # Sm SMALL PLUS SIGN +FE63;W # Pd SMALL HYPHEN-MINUS +FE64..FE66;W # Sm [3] SMALL LESS-THAN SIGN..SMALL EQUALS SIGN +FE68;W # Po SMALL REVERSE SOLIDUS +FE69;W # Sc SMALL DOLLAR SIGN +FE6A..FE6B;W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT +FE70..FE74;N # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM +FE76..FEFC;N # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM +FEFF;N # Cf ZERO WIDTH NO-BREAK SPACE +FF01..FF03;F # Po [3] FULLWIDTH EXCLAMATION MARK..FULLWIDTH NUMBER SIGN +FF04;F # Sc FULLWIDTH DOLLAR SIGN +FF05..FF07;F # Po [3] FULLWIDTH PERCENT SIGN..FULLWIDTH APOSTROPHE +FF08;F # Ps FULLWIDTH LEFT PARENTHESIS +FF09;F # Pe FULLWIDTH RIGHT PARENTHESIS +FF0A;F # Po FULLWIDTH ASTERISK +FF0B;F # Sm FULLWIDTH PLUS SIGN +FF0C;F # Po FULLWIDTH COMMA +FF0D;F # Pd FULLWIDTH HYPHEN-MINUS +FF0E..FF0F;F # Po [2] FULLWIDTH FULL STOP..FULLWIDTH SOLIDUS +FF10..FF19;F # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE +FF1A..FF1B;F # Po [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON +FF1C..FF1E;F # Sm [3] FULLWIDTH LESS-THAN SIGN..FULLWIDTH GREATER-THAN SIGN +FF1F..FF20;F # Po [2] FULLWIDTH QUESTION MARK..FULLWIDTH COMMERCIAL AT +FF21..FF3A;F # Lu [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z +FF3B;F # Ps FULLWIDTH LEFT SQUARE BRACKET +FF3C;F # Po FULLWIDTH REVERSE SOLIDUS +FF3D;F # Pe FULLWIDTH RIGHT SQUARE BRACKET +FF3E;F # Sk FULLWIDTH CIRCUMFLEX ACCENT +FF3F;F # Pc FULLWIDTH LOW LINE +FF40;F # Sk FULLWIDTH GRAVE ACCENT +FF41..FF5A;F # Ll [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z +FF5B;F # Ps FULLWIDTH LEFT CURLY BRACKET +FF5C;F # Sm FULLWIDTH VERTICAL LINE +FF5D;F # Pe FULLWIDTH RIGHT CURLY BRACKET +FF5E;F # Sm FULLWIDTH TILDE +FF5F;F # Ps FULLWIDTH LEFT WHITE PARENTHESIS +FF60;F # Pe FULLWIDTH RIGHT WHITE PARENTHESIS +FF61;H # Po HALFWIDTH IDEOGRAPHIC FULL STOP +FF62;H # Ps HALFWIDTH LEFT CORNER BRACKET +FF63;H # Pe HALFWIDTH RIGHT CORNER BRACKET +FF64..FF65;H # Po [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDLE DOT +FF66..FF6F;H # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU +FF70;H # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF71..FF9D;H # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N +FF9E..FF9F;H # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +FFA0..FFBE;H # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH +FFC2..FFC7;H # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E +FFCA..FFCF;H # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE +FFD2..FFD7;H # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU +FFDA..FFDC;H # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I +FFE0..FFE1;F # Sc [2] FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN +FFE2;F # Sm FULLWIDTH NOT SIGN +FFE3;F # Sk FULLWIDTH MACRON +FFE4;F # So FULLWIDTH BROKEN BAR +FFE5..FFE6;F # Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN +FFE8;H # So HALFWIDTH FORMS LIGHT VERTICAL +FFE9..FFEC;H # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW +FFED..FFEE;H # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE +FFF9..FFFB;N # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR +FFFC;N # So OBJECT REPLACEMENT CHARACTER +FFFD;A # So REPLACEMENT CHARACTER +10000..1000B;N # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE +1000D..10026;N # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO +10028..1003A;N # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO +1003C..1003D;N # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE +1003F..1004D;N # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO +10050..1005D;N # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 +10080..100FA;N # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 +10100..10102;N # Po [3] AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MARK +10107..10133;N # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND +10137..1013F;N # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT +10140..10174;N # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS +10175..10178;N # No [4] GREEK ONE HALF SIGN..GREEK THREE QUARTERS SIGN +10179..10189;N # So [17] GREEK YEAR SIGN..GREEK TRYBLION BASE SIGN +1018A..1018B;N # No [2] GREEK ZERO SIGN..GREEK ONE QUARTER SIGN +1018C..1018E;N # So [3] GREEK SINUSOID SIGN..NOMISMA SIGN +10190..1019C;N # So [13] ROMAN SEXTANS SIGN..ASCIA SYMBOL +101A0;N # So GREEK SYMBOL TAU RHO +101D0..101FC;N # So [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND +101FD;N # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE +10280..1029C;N # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X +102A0..102D0;N # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3 +102E0;N # Mn COPTIC EPACT THOUSANDS MARK +102E1..102FB;N # No [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED +10300..1031F;N # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS +10320..10323;N # No [4] OLD ITALIC NUMERAL ONE..OLD ITALIC NUMERAL FIFTY +1032D..1032F;N # Lo [3] OLD ITALIC LETTER YE..OLD ITALIC LETTER SOUTHERN TSE +10330..10340;N # Lo [17] GOTHIC LETTER AHSA..GOTHIC LETTER PAIRTHRA +10341;N # Nl GOTHIC LETTER NINETY +10342..10349;N # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL +1034A;N # Nl GOTHIC LETTER NINE HUNDRED +10350..10375;N # Lo [38] OLD PERMIC LETTER AN..OLD PERMIC LETTER IA +10376..1037A;N # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII +10380..1039D;N # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU +1039F;N # Po UGARITIC WORD DIVIDER +103A0..103C3;N # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA +103C8..103CF;N # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH +103D0;N # Po OLD PERSIAN WORD DIVIDER +103D1..103D5;N # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED +10400..1044F;N # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW +10450..1047F;N # Lo [48] SHAVIAN LETTER PEEP..SHAVIAN LETTER YEW +10480..1049D;N # Lo [30] OSMANYA LETTER ALEF..OSMANYA LETTER OO +104A0..104A9;N # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE +104B0..104D3;N # Lu [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA +104D8..104FB;N # Ll [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA +10500..10527;N # Lo [40] ELBASAN LETTER A..ELBASAN LETTER KHE +10530..10563;N # Lo [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW +1056F;N # Po CAUCASIAN ALBANIAN CITATION MARK +10570..1057A;N # Lu [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A;N # Lu [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592;N # Lu [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595;N # Lu [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE +10597..105A1;N # Ll [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1;N # Ll [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9;N # Ll [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC;N # Ll [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +10600..10736;N # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 +10740..10755;N # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE +10760..10767;N # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 +10780..10785;N # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK +10787..107B0;N # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK +107B2..107BA;N # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +10800..10805;N # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA +10808;N # Lo CYPRIOT SYLLABLE JO +1080A..10835;N # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO +10837..10838;N # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE +1083C;N # Lo CYPRIOT SYLLABLE ZA +1083F;N # Lo CYPRIOT SYLLABLE ZO +10840..10855;N # Lo [22] IMPERIAL ARAMAIC LETTER ALEPH..IMPERIAL ARAMAIC LETTER TAW +10857;N # Po IMPERIAL ARAMAIC SECTION SIGN +10858..1085F;N # No [8] IMPERIAL ARAMAIC NUMBER ONE..IMPERIAL ARAMAIC NUMBER TEN THOUSAND +10860..10876;N # Lo [23] PALMYRENE LETTER ALEPH..PALMYRENE LETTER TAW +10877..10878;N # So [2] PALMYRENE LEFT-POINTING FLEURON..PALMYRENE RIGHT-POINTING FLEURON +10879..1087F;N # No [7] PALMYRENE NUMBER ONE..PALMYRENE NUMBER TWENTY +10880..1089E;N # Lo [31] NABATAEAN LETTER FINAL ALEPH..NABATAEAN LETTER TAW +108A7..108AF;N # No [9] NABATAEAN NUMBER ONE..NABATAEAN NUMBER ONE HUNDRED +108E0..108F2;N # Lo [19] HATRAN LETTER ALEPH..HATRAN LETTER QOPH +108F4..108F5;N # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW +108FB..108FF;N # No [5] HATRAN NUMBER ONE..HATRAN NUMBER ONE HUNDRED +10900..10915;N # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU +10916..1091B;N # No [6] PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE +1091F;N # Po PHOENICIAN WORD SEPARATOR +10920..10939;N # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +1093F;N # Po LYDIAN TRIANGULAR MARK +10980..1099F;N # Lo [32] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC HIEROGLYPHIC SYMBOL VIDJ-2 +109A0..109B7;N # Lo [24] MEROITIC CURSIVE LETTER A..MEROITIC CURSIVE LETTER DA +109BC..109BD;N # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF +109BE..109BF;N # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN +109C0..109CF;N # No [16] MEROITIC CURSIVE NUMBER ONE..MEROITIC CURSIVE NUMBER SEVENTY +109D2..109FF;N # No [46] MEROITIC CURSIVE NUMBER ONE HUNDRED..MEROITIC CURSIVE FRACTION TEN TWELFTHS +10A00;N # Lo KHAROSHTHI LETTER A +10A01..10A03;N # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R +10A05..10A06;N # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O +10A0C..10A0F;N # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA +10A10..10A13;N # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA +10A15..10A17;N # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA +10A19..10A35;N # Lo [29] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER VHA +10A38..10A3A;N # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW +10A3F;N # Mn KHAROSHTHI VIRAMA +10A40..10A48;N # No [9] KHAROSHTHI DIGIT ONE..KHAROSHTHI FRACTION ONE HALF +10A50..10A58;N # Po [9] KHAROSHTHI PUNCTUATION DOT..KHAROSHTHI PUNCTUATION LINES +10A60..10A7C;N # Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH +10A7D..10A7E;N # No [2] OLD SOUTH ARABIAN NUMBER ONE..OLD SOUTH ARABIAN NUMBER FIFTY +10A7F;N # Po OLD SOUTH ARABIAN NUMERIC INDICATOR +10A80..10A9C;N # Lo [29] OLD NORTH ARABIAN LETTER HEH..OLD NORTH ARABIAN LETTER ZAH +10A9D..10A9F;N # No [3] OLD NORTH ARABIAN NUMBER ONE..OLD NORTH ARABIAN NUMBER TWENTY +10AC0..10AC7;N # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW +10AC8;N # So MANICHAEAN SIGN UD +10AC9..10AE4;N # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW +10AE5..10AE6;N # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW +10AEB..10AEF;N # No [5] MANICHAEAN NUMBER ONE..MANICHAEAN NUMBER ONE HUNDRED +10AF0..10AF6;N # Po [7] MANICHAEAN PUNCTUATION STAR..MANICHAEAN PUNCTUATION LINE FILLER +10B00..10B35;N # Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE +10B39..10B3F;N # Po [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION +10B40..10B55;N # Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW +10B58..10B5F;N # No [8] INSCRIPTIONAL PARTHIAN NUMBER ONE..INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND +10B60..10B72;N # Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW +10B78..10B7F;N # No [8] INSCRIPTIONAL PAHLAVI NUMBER ONE..INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND +10B80..10B91;N # Lo [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW +10B99..10B9C;N # Po [4] PSALTER PAHLAVI SECTION MARK..PSALTER PAHLAVI FOUR DOTS WITH DOT +10BA9..10BAF;N # No [7] PSALTER PAHLAVI NUMBER ONE..PSALTER PAHLAVI NUMBER ONE HUNDRED +10C00..10C48;N # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH +10C80..10CB2;N # Lu [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10CC0..10CF2;N # Ll [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10CFA..10CFF;N # No [6] OLD HUNGARIAN NUMBER ONE..OLD HUNGARIAN NUMBER ONE THOUSAND +10D00..10D23;N # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10D24..10D27;N # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D30..10D39;N # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10E60..10E7E;N # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS +10E80..10EA9;N # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET +10EAB..10EAC;N # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EAD;N # Pd YEZIDI HYPHENATION MARK +10EB0..10EB1;N # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10EFD..10EFF;N # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA +10F00..10F1C;N # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL +10F1D..10F26;N # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF +10F27;N # Lo OLD SOGDIAN LIGATURE AYIN-DALETH +10F30..10F45;N # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN +10F46..10F50;N # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW +10F51..10F54;N # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED +10F55..10F59;N # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT +10F70..10F81;N # Lo [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH +10F82..10F85;N # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW +10F86..10F89;N # Po [4] OLD UYGHUR PUNCTUATION BAR..OLD UYGHUR PUNCTUATION FOUR DOTS +10FB0..10FC4;N # Lo [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW +10FC5..10FCB;N # No [7] CHORASMIAN NUMBER ONE..CHORASMIAN NUMBER ONE HUNDRED +10FE0..10FF6;N # Lo [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH +11000;N # Mc BRAHMI SIGN CANDRABINDU +11001;N # Mn BRAHMI SIGN ANUSVARA +11002;N # Mc BRAHMI SIGN VISARGA +11003..11037;N # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA +11038..11046;N # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA +11047..1104D;N # Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS +11052..11065;N # No [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND +11066..1106F;N # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE +11070;N # Mn BRAHMI SIGN OLD TAMIL VIRAMA +11071..11072;N # Lo [2] BRAHMI LETTER OLD TAMIL SHORT E..BRAHMI LETTER OLD TAMIL SHORT O +11073..11074;N # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O +11075;N # Lo BRAHMI LETTER OLD TAMIL LLA +1107F;N # Mn BRAHMI NUMBER JOINER +11080..11081;N # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA +11082;N # Mc KAITHI SIGN VISARGA +11083..110AF;N # Lo [45] KAITHI LETTER A..KAITHI LETTER HA +110B0..110B2;N # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II +110B3..110B6;N # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI +110B7..110B8;N # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +110B9..110BA;N # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +110BB..110BC;N # Po [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN +110BD;N # Cf KAITHI NUMBER SIGN +110BE..110C1;N # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +110C2;N # Mn KAITHI VOWEL SIGN VOCALIC R +110CD;N # Cf KAITHI NUMBER SIGN ABOVE +110D0..110E8;N # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +110F0..110F9;N # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11100..11102;N # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11103..11126;N # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11127..1112B;N # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C;N # Mc CHAKMA VOWEL SIGN E +1112D..11134;N # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11136..1113F;N # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +11140..11143;N # Po [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK +11144;N # Lo CHAKMA LETTER LHAA +11145..11146;N # Mc [2] CHAKMA VOWEL SIGN AA..CHAKMA VOWEL SIGN EI +11147;N # Lo CHAKMA LETTER VAA +11150..11172;N # Lo [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA +11173;N # Mn MAHAJANI SIGN NUKTA +11174..11175;N # Po [2] MAHAJANI ABBREVIATION SIGN..MAHAJANI SECTION MARK +11176;N # Lo MAHAJANI LIGATURE SHRI +11180..11181;N # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182;N # Mc SHARADA SIGN VISARGA +11183..111B2;N # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111B3..111B5;N # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE;N # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF..111C0;N # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +111C1..111C4;N # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111C5..111C8;N # Po [4] SHARADA DANDA..SHARADA SEPARATOR +111C9..111CC;N # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK +111CD;N # Po SHARADA SUTRA MARK +111CE;N # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E +111CF;N # Mn SHARADA SIGN INVERTED CANDRABINDU +111D0..111D9;N # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +111DA;N # Lo SHARADA EKAM +111DB;N # Po SHARADA SIGN SIDDHAM +111DC;N # Lo SHARADA HEADSTROKE +111DD..111DF;N # Po [3] SHARADA CONTINUATION SIGN..SHARADA SECTION MARK-2 +111E1..111F4;N # No [20] SINHALA ARCHAIC DIGIT ONE..SINHALA ARCHAIC NUMBER ONE THOUSAND +11200..11211;N # Lo [18] KHOJKI LETTER A..KHOJKI LETTER JJA +11213..1122B;N # Lo [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA +1122C..1122E;N # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II +1122F..11231;N # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI +11232..11233;N # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU +11234;N # Mn KHOJKI SIGN ANUSVARA +11235;N # Mc KHOJKI SIGN VIRAMA +11236..11237;N # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA +11238..1123D;N # Po [6] KHOJKI DANDA..KHOJKI ABBREVIATION SIGN +1123E;N # Mn KHOJKI SIGN SUKUN +1123F..11240;N # Lo [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I +11241;N # Mn KHOJKI VOWEL SIGN VOCALIC R +11280..11286;N # Lo [7] MULTANI LETTER A..MULTANI LETTER GA +11288;N # Lo MULTANI LETTER GHA +1128A..1128D;N # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA +1128F..1129D;N # Lo [15] MULTANI LETTER NYA..MULTANI LETTER BA +1129F..112A8;N # Lo [10] MULTANI LETTER BHA..MULTANI LETTER RHA +112A9;N # Po MULTANI SECTION MARK +112B0..112DE;N # Lo [47] KHUDAWADI LETTER A..KHUDAWADI LETTER HA +112DF;N # Mn KHUDAWADI SIGN ANUSVARA +112E0..112E2;N # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II +112E3..112EA;N # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA +112F0..112F9;N # Nd [10] KHUDAWADI DIGIT ZERO..KHUDAWADI DIGIT NINE +11300..11301;N # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU +11302..11303;N # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA +11305..1130C;N # Lo [8] GRANTHA LETTER A..GRANTHA LETTER VOCALIC L +1130F..11310;N # Lo [2] GRANTHA LETTER EE..GRANTHA LETTER AI +11313..11328;N # Lo [22] GRANTHA LETTER OO..GRANTHA LETTER NA +1132A..11330;N # Lo [7] GRANTHA LETTER PA..GRANTHA LETTER RA +11332..11333;N # Lo [2] GRANTHA LETTER LA..GRANTHA LETTER LLA +11335..11339;N # Lo [5] GRANTHA LETTER VA..GRANTHA LETTER HA +1133B..1133C;N # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA +1133D;N # Lo GRANTHA SIGN AVAGRAHA +1133E..1133F;N # Mc [2] GRANTHA VOWEL SIGN AA..GRANTHA VOWEL SIGN I +11340;N # Mn GRANTHA VOWEL SIGN II +11341..11344;N # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR +11347..11348;N # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI +1134B..1134D;N # Mc [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA +11350;N # Lo GRANTHA OM +11357;N # Mc GRANTHA AU LENGTH MARK +1135D..11361;N # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL +11362..11363;N # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +11366..1136C;N # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX +11370..11374;N # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +11400..11434;N # Lo [53] NEWA LETTER A..NEWA LETTER HA +11435..11437;N # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II +11438..1143F;N # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI +11440..11441;N # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU +11442..11444;N # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA +11445;N # Mc NEWA SIGN VISARGA +11446;N # Mn NEWA SIGN NUKTA +11447..1144A;N # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI +1144B..1144F;N # Po [5] NEWA DANDA..NEWA ABBREVIATION SIGN +11450..11459;N # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE +1145A..1145B;N # Po [2] NEWA DOUBLE COMMA..NEWA PLACEHOLDER MARK +1145D;N # Po NEWA INSERTION SIGN +1145E;N # Mn NEWA SANDHI MARK +1145F..11461;N # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA +11480..114AF;N # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA +114B0..114B2;N # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II +114B3..114B8;N # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL +114B9;N # Mc TIRHUTA VOWEL SIGN E +114BA;N # Mn TIRHUTA VOWEL SIGN SHORT E +114BB..114BE;N # Mc [4] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN AU +114BF..114C0;N # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA +114C1;N # Mc TIRHUTA SIGN VISARGA +114C2..114C3;N # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA +114C4..114C5;N # Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG +114C6;N # Po TIRHUTA ABBREVIATION SIGN +114C7;N # Lo TIRHUTA OM +114D0..114D9;N # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE +11580..115AE;N # Lo [47] SIDDHAM LETTER A..SIDDHAM LETTER HA +115AF..115B1;N # Mc [3] SIDDHAM VOWEL SIGN AA..SIDDHAM VOWEL SIGN II +115B2..115B5;N # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR +115B8..115BB;N # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU +115BC..115BD;N # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA +115BE;N # Mc SIDDHAM SIGN VISARGA +115BF..115C0;N # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA +115C1..115D7;N # Po [23] SIDDHAM SIGN SIDDHAM..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES +115D8..115DB;N # Lo [4] SIDDHAM LETTER THREE-CIRCLE ALTERNATE I..SIDDHAM LETTER ALTERNATE U +115DC..115DD;N # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU +11600..1162F;N # Lo [48] MODI LETTER A..MODI LETTER LLA +11630..11632;N # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II +11633..1163A;N # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI +1163B..1163C;N # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU +1163D;N # Mn MODI SIGN ANUSVARA +1163E;N # Mc MODI SIGN VISARGA +1163F..11640;N # Mn [2] MODI SIGN VIRAMA..MODI SIGN ARDHACANDRA +11641..11643;N # Po [3] MODI DANDA..MODI ABBREVIATION SIGN +11644;N # Lo MODI SIGN HUVA +11650..11659;N # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE +11660..1166C;N # Po [13] MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT +11680..116AA;N # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116AB;N # Mn TAKRI SIGN ANUSVARA +116AC;N # Mc TAKRI SIGN VISARGA +116AD;N # Mn TAKRI VOWEL SIGN AA +116AE..116AF;N # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5;N # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B6;N # Mc TAKRI SIGN VIRAMA +116B7;N # Mn TAKRI SIGN NUKTA +116B8;N # Lo TAKRI LETTER ARCHAIC KHA +116B9;N # Po TAKRI ABBREVIATION SIGN +116C0..116C9;N # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +11700..1171A;N # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA +1171D..1171F;N # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +11720..11721;N # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA +11722..11725;N # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU +11726;N # Mc AHOM VOWEL SIGN E +11727..1172B;N # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER +11730..11739;N # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE +1173A..1173B;N # No [2] AHOM NUMBER TEN..AHOM NUMBER TWENTY +1173C..1173E;N # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI +1173F;N # So AHOM SYMBOL VI +11740..11746;N # Lo [7] AHOM LETTER CA..AHOM LETTER LLA +11800..1182B;N # Lo [44] DOGRA LETTER A..DOGRA LETTER RRA +1182C..1182E;N # Mc [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II +1182F..11837;N # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA +11838;N # Mc DOGRA SIGN VISARGA +11839..1183A;N # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA +1183B;N # Po DOGRA ABBREVIATION SIGN +118A0..118DF;N # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO +118E0..118E9;N # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE +118EA..118F2;N # No [9] WARANG CITI NUMBER TEN..WARANG CITI NUMBER NINETY +118FF;N # Lo WARANG CITI OM +11900..11906;N # Lo [7] DIVES AKURU LETTER A..DIVES AKURU LETTER E +11909;N # Lo DIVES AKURU LETTER O +1190C..11913;N # Lo [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA +11915..11916;N # Lo [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA +11918..1192F;N # Lo [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA +11930..11935;N # Mc [6] DIVES AKURU VOWEL SIGN AA..DIVES AKURU VOWEL SIGN E +11937..11938;N # Mc [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O +1193B..1193C;N # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU +1193D;N # Mc DIVES AKURU SIGN HALANTA +1193E;N # Mn DIVES AKURU VIRAMA +1193F;N # Lo DIVES AKURU PREFIXED NASAL SIGN +11940;N # Mc DIVES AKURU MEDIAL YA +11941;N # Lo DIVES AKURU INITIAL RA +11942;N # Mc DIVES AKURU MEDIAL RA +11943;N # Mn DIVES AKURU SIGN NUKTA +11944..11946;N # Po [3] DIVES AKURU DOUBLE DANDA..DIVES AKURU END OF TEXT MARK +11950..11959;N # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE +119A0..119A7;N # Lo [8] NANDINAGARI LETTER A..NANDINAGARI LETTER VOCALIC RR +119AA..119D0;N # Lo [39] NANDINAGARI LETTER E..NANDINAGARI LETTER RRA +119D1..119D3;N # Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II +119D4..119D7;N # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR +119DA..119DB;N # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI +119DC..119DF;N # Mc [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA +119E0;N # Mn NANDINAGARI SIGN VIRAMA +119E1;N # Lo NANDINAGARI SIGN AVAGRAHA +119E2;N # Po NANDINAGARI SIGN SIDDHAM +119E3;N # Lo NANDINAGARI HEADSTROKE +119E4;N # Mc NANDINAGARI VOWEL SIGN PRISHTHAMATRA E +11A00;N # Lo ZANABAZAR SQUARE LETTER A +11A01..11A0A;N # Mn [10] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL LENGTH MARK +11A0B..11A32;N # Lo [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA +11A33..11A38;N # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA +11A39;N # Mc ZANABAZAR SQUARE SIGN VISARGA +11A3A;N # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA +11A3B..11A3E;N # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA +11A3F..11A46;N # Po [8] ZANABAZAR SQUARE INITIAL HEAD MARK..ZANABAZAR SQUARE CLOSING DOUBLE-LINED HEAD MARK +11A47;N # Mn ZANABAZAR SQUARE SUBJOINER +11A50;N # Lo SOYOMBO LETTER A +11A51..11A56;N # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE +11A57..11A58;N # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU +11A59..11A5B;N # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11A5C..11A89;N # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA +11A8A..11A96;N # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA +11A97;N # Mc SOYOMBO SIGN VISARGA +11A98..11A99;N # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11A9A..11A9C;N # Po [3] SOYOMBO MARK TSHEG..SOYOMBO MARK DOUBLE SHAD +11A9D;N # Lo SOYOMBO MARK PLUTA +11A9E..11AA2;N # Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2 +11AB0..11ABF;N # Lo [16] CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA +11AC0..11AF8;N # Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL +11B00..11B09;N # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU +11C00..11C08;N # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L +11C0A..11C2E;N # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA +11C2F;N # Mc BHAIKSUKI VOWEL SIGN AA +11C30..11C36;N # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L +11C38..11C3D;N # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA +11C3E;N # Mc BHAIKSUKI SIGN VISARGA +11C3F;N # Mn BHAIKSUKI SIGN VIRAMA +11C40;N # Lo BHAIKSUKI SIGN AVAGRAHA +11C41..11C45;N # Po [5] BHAIKSUKI DANDA..BHAIKSUKI GAP FILLER-2 +11C50..11C59;N # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE +11C5A..11C6C;N # No [19] BHAIKSUKI NUMBER ONE..BHAIKSUKI HUNDREDS UNIT MARK +11C70..11C71;N # Po [2] MARCHEN HEAD MARK..MARCHEN MARK SHAD +11C72..11C8F;N # Lo [30] MARCHEN LETTER KA..MARCHEN LETTER A +11C92..11CA7;N # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA +11CA9;N # Mc MARCHEN SUBJOINED LETTER YA +11CAA..11CB0;N # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA +11CB1;N # Mc MARCHEN VOWEL SIGN I +11CB2..11CB3;N # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E +11CB4;N # Mc MARCHEN VOWEL SIGN O +11CB5..11CB6;N # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU +11D00..11D06;N # Lo [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E +11D08..11D09;N # Lo [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O +11D0B..11D30;N # Lo [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA +11D31..11D36;N # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R +11D3A;N # Mn MASARAM GONDI VOWEL SIGN E +11D3C..11D3D;N # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O +11D3F..11D45;N # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA +11D46;N # Lo MASARAM GONDI REPHA +11D47;N # Mn MASARAM GONDI RA-KARA +11D50..11D59;N # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE +11D60..11D65;N # Lo [6] GUNJALA GONDI LETTER A..GUNJALA GONDI LETTER UU +11D67..11D68;N # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI +11D6A..11D89;N # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA +11D8A..11D8E;N # Mc [5] GUNJALA GONDI VOWEL SIGN AA..GUNJALA GONDI VOWEL SIGN UU +11D90..11D91;N # Mn [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI +11D93..11D94;N # Mc [2] GUNJALA GONDI VOWEL SIGN OO..GUNJALA GONDI VOWEL SIGN AU +11D95;N # Mn GUNJALA GONDI SIGN ANUSVARA +11D96;N # Mc GUNJALA GONDI SIGN VISARGA +11D97;N # Mn GUNJALA GONDI VIRAMA +11D98;N # Lo GUNJALA GONDI OM +11DA0..11DA9;N # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11EE0..11EF2;N # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA +11EF3..11EF4;N # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U +11EF5..11EF6;N # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O +11EF7..11EF8;N # Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION +11F00..11F01;N # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA +11F02;N # Lo KAWI SIGN REPHA +11F03;N # Mc KAWI SIGN VISARGA +11F04..11F10;N # Lo [13] KAWI LETTER A..KAWI LETTER O +11F12..11F33;N # Lo [34] KAWI LETTER KA..KAWI LETTER JNYA +11F34..11F35;N # Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA +11F36..11F3A;N # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R +11F3E..11F3F;N # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI +11F40;N # Mn KAWI VOWEL SIGN EU +11F41;N # Mc KAWI SIGN KILLER +11F42;N # Mn KAWI CONJOINER +11F43..11F4F;N # Po [13] KAWI DANDA..KAWI PUNCTUATION CLOSING SPIRAL +11F50..11F59;N # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +11FB0;N # Lo LISU LETTER YHA +11FC0..11FD4;N # No [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH +11FD5..11FDC;N # So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI +11FDD..11FE0;N # Sc [4] TAMIL SIGN KAACU..TAMIL SIGN VARAAKAN +11FE1..11FF1;N # So [17] TAMIL SIGN PAARAM..TAMIL SIGN VAKAIYARAA +11FFF;N # Po TAMIL PUNCTUATION END OF TEXT +12000..12399;N # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U +12400..1246E;N # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM +12470..12474;N # Po [5] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON +12480..12543;N # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU +12F90..12FF0;N # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 +12FF1..12FF2;N # Po [2] CYPRO-MINOAN SIGN CM301..CYPRO-MINOAN SIGN CM302 +13000..1342F;N # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D +13430..1343F;N # Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE +13440;N # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY +13441..13446;N # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN +13447..13455;N # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +14400..14646;N # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16800..16A38;N # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16A40..16A5E;N # Lo [31] MRO LETTER TA..MRO LETTER TEK +16A60..16A69;N # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE +16A6E..16A6F;N # Po [2] MRO DANDA..MRO DOUBLE DANDA +16A70..16ABE;N # Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA +16AC0..16AC9;N # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE +16AD0..16AED;N # Lo [30] BASSA VAH LETTER ENNI..BASSA VAH LETTER I +16AF0..16AF4;N # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE +16AF5;N # Po BASSA VAH FULL STOP +16B00..16B2F;N # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU +16B30..16B36;N # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16B37..16B3B;N # Po [5] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS FEEM +16B3C..16B3F;N # So [4] PAHAWH HMONG SIGN XYEEM NTXIV..PAHAWH HMONG SIGN XYEEM FAIB +16B40..16B43;N # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM +16B44;N # Po PAHAWH HMONG SIGN XAUS +16B45;N # So PAHAWH HMONG SIGN CIM TSOV ROG +16B50..16B59;N # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE +16B5B..16B61;N # No [7] PAHAWH HMONG NUMBER TENS..PAHAWH HMONG NUMBER TRILLIONS +16B63..16B77;N # Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS +16B7D..16B8F;N # Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ +16E40..16E7F;N # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16E80..16E96;N # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM +16E97..16E9A;N # Po [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH +16F00..16F4A;N # Lo [75] MIAO LETTER PA..MIAO LETTER RTE +16F4F;N # Mn MIAO SIGN CONSONANT MODIFIER BAR +16F50;N # Lo MIAO LETTER NASALIZATION +16F51..16F87;N # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI +16F8F..16F92;N # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F;N # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 +16FE0..16FE1;W # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK +16FE2;W # Po OLD CHINESE HOOK MARK +16FE3;W # Lm OLD CHINESE ITERATION MARK +16FE4;W # Mn KHITAN SMALL SCRIPT FILLER +16FF0..16FF1;W # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +17000..187F7;W # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 +18800..18AFF;W # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 +18B00..18CD5;W # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D08;W # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +1AFF0..1AFF3;W # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 +1AFF5..1AFFB;W # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 +1AFFD..1AFFE;W # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 +1B000..1B0FF;W # Lo [256] KATAKANA LETTER ARCHAIC E..HENTAIGANA LETTER RE-2 +1B100..1B122;W # Lo [35] HENTAIGANA LETTER RE-3..KATAKANA LETTER ARCHAIC WU +1B132;W # Lo HIRAGANA LETTER SMALL KO +1B150..1B152;W # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO +1B155;W # Lo KATAKANA LETTER SMALL KO +1B164..1B167;W # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B170..1B2FB;W # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB +1BC00..1BC6A;N # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M +1BC70..1BC7C;N # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK +1BC80..1BC88;N # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL +1BC90..1BC99;N # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW +1BC9C;N # So DUPLOYAN SIGN O WITH CROSS +1BC9D..1BC9E;N # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK +1BC9F;N # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP +1BCA0..1BCA3;N # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP +1CF00..1CF2D;N # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT +1CF30..1CF46;N # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG +1CF50..1CFC3;N # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK +1D000..1D0F5;N # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO +1D100..1D126;N # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 +1D129..1D164;N # So [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE +1D165..1D166;N # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM +1D167..1D169;N # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 +1D16A..1D16C;N # So [3] MUSICAL SYMBOL FINGERED TREMOLO-1..MUSICAL SYMBOL FINGERED TREMOLO-3 +1D16D..1D172;N # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 +1D173..1D17A;N # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE +1D17B..1D182;N # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE +1D183..1D184;N # So [2] MUSICAL SYMBOL ARPEGGIATO UP..MUSICAL SYMBOL ARPEGGIATO DOWN +1D185..1D18B;N # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE +1D18C..1D1A9;N # So [30] MUSICAL SYMBOL RINFORZANDO..MUSICAL SYMBOL DEGREE SLASH +1D1AA..1D1AD;N # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO +1D1AE..1D1EA;N # So [61] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL KORON +1D200..1D241;N # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 +1D242..1D244;N # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME +1D245;N # So GREEK MUSICAL LEIMMA +1D2C0..1D2D3;N # No [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN +1D2E0..1D2F3;N # No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN +1D300..1D356;N # So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING +1D360..1D378;N # No [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE +1D400..1D454;N # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G +1D456..1D49C;N # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F;N # Lu [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2;N # Lu MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6;N # Lu [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC;N # Lu [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B9;N # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D +1D4BB;N # Ll MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3;N # Ll [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D505;N # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A;N # Lu [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514;N # Lu [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C;N # Lu [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D51E..1D539;N # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E;N # Lu [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544;N # Lu [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546;N # Lu MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550;N # Lu [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D552..1D6A5;N # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6A8..1D6C0;N # Lu [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6C1;N # Sm MATHEMATICAL BOLD NABLA +1D6C2..1D6DA;N # Ll [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DB;N # Sm MATHEMATICAL BOLD PARTIAL DIFFERENTIAL +1D6DC..1D6FA;N # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA +1D6FB;N # Sm MATHEMATICAL ITALIC NABLA +1D6FC..1D714;N # Ll [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D715;N # Sm MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL +1D716..1D734;N # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D735;N # Sm MATHEMATICAL BOLD ITALIC NABLA +1D736..1D74E;N # Ll [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D74F;N # Sm MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL +1D750..1D76E;N # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D76F;N # Sm MATHEMATICAL SANS-SERIF BOLD NABLA +1D770..1D788;N # Ll [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D789;N # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL +1D78A..1D7A8;N # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7A9;N # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA +1D7AA..1D7C2;N # Ll [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C3;N # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL +1D7C4..1D7CB;N # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1D7CE..1D7FF;N # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1D800..1D9FF;N # So [512] SIGNWRITING HAND-FIST INDEX..SIGNWRITING HEAD +1DA00..1DA36;N # Mn [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN +1DA37..1DA3A;N # So [4] SIGNWRITING AIR BLOW SMALL ROTATIONS..SIGNWRITING BREATH EXHALE +1DA3B..1DA6C;N # Mn [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT +1DA6D..1DA74;N # So [8] SIGNWRITING SHOULDER HIP SPINE..SIGNWRITING TORSO-FLOORPLANE TWISTING +1DA75;N # Mn SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS +1DA76..1DA83;N # So [14] SIGNWRITING LIMB COMBINATION..SIGNWRITING LOCATION DEPTH +1DA84;N # Mn SIGNWRITING LOCATION HEAD NECK +1DA85..1DA86;N # So [2] SIGNWRITING LOCATION TORSO..SIGNWRITING LOCATION LIMBS DIGITS +1DA87..1DA8B;N # Po [5] SIGNWRITING COMMA..SIGNWRITING PARENTHESIS +1DA9B..1DA9F;N # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 +1DAA1..1DAAF;N # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 +1DF00..1DF09;N # Ll [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK +1DF0A;N # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK +1DF0B..1DF1E;N # Ll [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1DF25..1DF2A;N # Ll [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1E000..1E006;N # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE +1E008..1E018;N # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU +1E01B..1E021;N # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI +1E023..1E024;N # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS +1E026..1E02A;N # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E030..1E06D;N # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE +1E08F;N # Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +1E100..1E12C;N # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W +1E130..1E136;N # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D +1E137..1E13D;N # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER +1E140..1E149;N # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE +1E14E;N # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ +1E14F;N # So NYIAKENG PUACHUE HMONG CIRCLED CA +1E290..1E2AD;N # Lo [30] TOTO LETTER PA..TOTO LETTER A +1E2AE;N # Mn TOTO SIGN RISING TONE +1E2C0..1E2EB;N # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH +1E2EC..1E2EF;N # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E2F0..1E2F9;N # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE +1E2FF;N # Sc WANCHO NGUN SIGN +1E4D0..1E4EA;N # Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL +1E4EB;N # Lm NAG MUNDARI SIGN OJOD +1E4EC..1E4EF;N # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH +1E4F0..1E4F9;N # Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE +1E7E0..1E7E6;N # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO +1E7E8..1E7EB;N # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE +1E7ED..1E7EE;N # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE +1E7F0..1E7FE;N # Lo [15] ETHIOPIC SYLLABLE GURAGE QWI..ETHIOPIC SYLLABLE GURAGE PWEE +1E800..1E8C4;N # Lo [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON +1E8C7..1E8CF;N # No [9] MENDE KIKAKUI DIGIT ONE..MENDE KIKAKUI DIGIT NINE +1E8D0..1E8D6;N # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS +1E900..1E943;N # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA +1E944..1E94A;N # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA +1E94B;N # Lm ADLAM NASALIZATION MARK +1E950..1E959;N # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE +1E95E..1E95F;N # Po [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK +1EC71..1ECAB;N # No [59] INDIC SIYAQ NUMBER ONE..INDIC SIYAQ NUMBER PREFIXED NINE +1ECAC;N # So INDIC SIYAQ PLACEHOLDER +1ECAD..1ECAF;N # No [3] INDIC SIYAQ FRACTION ONE QUARTER..INDIC SIYAQ FRACTION THREE QUARTERS +1ECB0;N # Sc INDIC SIYAQ RUPEE MARK +1ECB1..1ECB4;N # No [4] INDIC SIYAQ NUMBER ALTERNATE ONE..INDIC SIYAQ ALTERNATE LAKH MARK +1ED01..1ED2D;N # No [45] OTTOMAN SIYAQ NUMBER ONE..OTTOMAN SIYAQ NUMBER NINETY THOUSAND +1ED2E;N # So OTTOMAN SIYAQ MARRATAN +1ED2F..1ED3D;N # No [15] OTTOMAN SIYAQ ALTERNATE NUMBER TWO..OTTOMAN SIYAQ FRACTION ONE SIXTH +1EE00..1EE03;N # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F;N # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22;N # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24;N # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27;N # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32;N # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37;N # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39;N # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B;N # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42;N # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47;N # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49;N # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B;N # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F;N # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52;N # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54;N # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57;N # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59;N # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B;N # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D;N # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F;N # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62;N # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64;N # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A;N # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72;N # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77;N # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C;N # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E;N # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89;N # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B;N # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3;N # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9;N # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB;N # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1EEF0..1EEF1;N # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL +1F000..1F003;N # So [4] MAHJONG TILE EAST WIND..MAHJONG TILE NORTH WIND +1F004;W # So MAHJONG TILE RED DRAGON +1F005..1F02B;N # So [39] MAHJONG TILE GREEN DRAGON..MAHJONG TILE BACK +1F030..1F093;N # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06 +1F0A0..1F0AE;N # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES +1F0B1..1F0BF;N # So [15] PLAYING CARD ACE OF HEARTS..PLAYING CARD RED JOKER +1F0C1..1F0CE;N # So [14] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD KING OF DIAMONDS +1F0CF;W # So PLAYING CARD BLACK JOKER +1F0D1..1F0F5;N # So [37] PLAYING CARD ACE OF CLUBS..PLAYING CARD TRUMP-21 +1F100..1F10A;A # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA +1F10B..1F10C;N # No [2] DINGBAT CIRCLED SANS-SERIF DIGIT ZERO..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO +1F10D..1F10F;N # So [3] CIRCLED ZERO WITH SLASH..CIRCLED DOLLAR SIGN WITH OVERLAID BACKSLASH +1F110..1F12D;A # So [30] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED CD +1F12E..1F12F;N # So [2] CIRCLED WZ..COPYLEFT SYMBOL +1F130..1F169;A # So [58] SQUARED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F16A..1F16F;N # So [6] RAISED MC SIGN..CIRCLED HUMAN FIGURE +1F170..1F18D;A # So [30] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED SA +1F18E;W # So NEGATIVE SQUARED AB +1F18F..1F190;A # So [2] NEGATIVE SQUARED WC..SQUARE DJ +1F191..1F19A;W # So [10] SQUARED CL..SQUARED VS +1F19B..1F1AC;A # So [18] SQUARED THREE D..SQUARED VOD +1F1AD;N # So MASK WORK SYMBOL +1F1E6..1F1FF;N # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z +1F200..1F202;W # So [3] SQUARE HIRAGANA HOKA..SQUARED KATAKANA SA +1F210..1F23B;W # So [44] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D +1F240..1F248;W # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 +1F250..1F251;W # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT +1F260..1F265;W # So [6] ROUNDED SYMBOL FOR FU..ROUNDED SYMBOL FOR CAI +1F300..1F320;W # So [33] CYCLONE..SHOOTING STAR +1F321..1F32C;N # So [12] THERMOMETER..WIND BLOWING FACE +1F32D..1F335;W # So [9] HOT DOG..CACTUS +1F336;N # So HOT PEPPER +1F337..1F37C;W # So [70] TULIP..BABY BOTTLE +1F37D;N # So FORK AND KNIFE WITH PLATE +1F37E..1F393;W # So [22] BOTTLE WITH POPPING CORK..GRADUATION CAP +1F394..1F39F;N # So [12] HEART WITH TIP ON THE LEFT..ADMISSION TICKETS +1F3A0..1F3CA;W # So [43] CAROUSEL HORSE..SWIMMER +1F3CB..1F3CE;N # So [4] WEIGHT LIFTER..RACING CAR +1F3CF..1F3D3;W # So [5] CRICKET BAT AND BALL..TABLE TENNIS PADDLE AND BALL +1F3D4..1F3DF;N # So [12] SNOW CAPPED MOUNTAIN..STADIUM +1F3E0..1F3F0;W # So [17] HOUSE BUILDING..EUROPEAN CASTLE +1F3F1..1F3F3;N # So [3] WHITE PENNANT..WAVING WHITE FLAG +1F3F4;W # So WAVING BLACK FLAG +1F3F5..1F3F7;N # So [3] ROSETTE..LABEL +1F3F8..1F3FA;W # So [3] BADMINTON RACQUET AND SHUTTLECOCK..AMPHORA +1F3FB..1F3FF;W # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 +1F400..1F43E;W # So [63] RAT..PAW PRINTS +1F43F;N # So CHIPMUNK +1F440;W # So EYES +1F441;N # So EYE +1F442..1F4FC;W # So [187] EAR..VIDEOCASSETTE +1F4FD..1F4FE;N # So [2] FILM PROJECTOR..PORTABLE STEREO +1F4FF..1F53D;W # So [63] PRAYER BEADS..DOWN-POINTING SMALL RED TRIANGLE +1F53E..1F54A;N # So [13] LOWER RIGHT SHADOWED WHITE CIRCLE..DOVE OF PEACE +1F54B..1F54E;W # So [4] KAABA..MENORAH WITH NINE BRANCHES +1F54F;N # So BOWL OF HYGIEIA +1F550..1F567;W # So [24] CLOCK FACE ONE OCLOCK..CLOCK FACE TWELVE-THIRTY +1F568..1F579;N # So [18] RIGHT SPEAKER..JOYSTICK +1F57A;W # So MAN DANCING +1F57B..1F594;N # So [26] LEFT HAND TELEPHONE RECEIVER..REVERSED VICTORY HAND +1F595..1F596;W # So [2] REVERSED HAND WITH MIDDLE FINGER EXTENDED..RAISED HAND WITH PART BETWEEN MIDDLE AND RING FINGERS +1F597..1F5A3;N # So [13] WHITE DOWN POINTING LEFT HAND INDEX..BLACK DOWN POINTING BACKHAND INDEX +1F5A4;W # So BLACK HEART +1F5A5..1F5FA;N # So [86] DESKTOP COMPUTER..WORLD MAP +1F5FB..1F5FF;W # So [5] MOUNT FUJI..MOYAI +1F600..1F64F;W # So [80] GRINNING FACE..PERSON WITH FOLDED HANDS +1F650..1F67F;N # So [48] NORTH WEST POINTING LEAF..REVERSE CHECKER BOARD +1F680..1F6C5;W # So [70] ROCKET..LEFT LUGGAGE +1F6C6..1F6CB;N # So [6] TRIANGLE WITH ROUNDED CORNERS..COUCH AND LAMP +1F6CC;W # So SLEEPING ACCOMMODATION +1F6CD..1F6CF;N # So [3] SHOPPING BAGS..BED +1F6D0..1F6D2;W # So [3] PLACE OF WORSHIP..SHOPPING TROLLEY +1F6D3..1F6D4;N # So [2] STUPA..PAGODA +1F6D5..1F6D7;W # So [3] HINDU TEMPLE..ELEVATOR +1F6DC..1F6DF;W # So [4] WIRELESS..RING BUOY +1F6E0..1F6EA;N # So [11] HAMMER AND WRENCH..NORTHEAST-POINTING AIRPLANE +1F6EB..1F6EC;W # So [2] AIRPLANE DEPARTURE..AIRPLANE ARRIVING +1F6F0..1F6F3;N # So [4] SATELLITE..PASSENGER SHIP +1F6F4..1F6FC;W # So [9] SCOOTER..ROLLER SKATE +1F700..1F776;N # So [119] ALCHEMICAL SYMBOL FOR QUINTESSENCE..LUNAR ECLIPSE +1F77B..1F77F;N # So [5] HAUMEA..ORCUS +1F780..1F7D9;N # So [90] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..NINE POINTED WHITE STAR +1F7E0..1F7EB;W # So [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE +1F7F0;W # So HEAVY EQUALS SIGN +1F800..1F80B;N # So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD +1F810..1F847;N # So [56] LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD..DOWNWARDS HEAVY ARROW +1F850..1F859;N # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW +1F860..1F887;N # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW +1F890..1F8AD;N # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS +1F8B0..1F8B1;N # So [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST +1F900..1F90B;N # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT +1F90C..1F93A;W # So [47] PINCHED FINGERS..FENCER +1F93B;N # So MODERN PENTATHLON +1F93C..1F945;W # So [10] WRESTLERS..GOAL NET +1F946;N # So RIFLE +1F947..1F9FF;W # So [185] FIRST PLACE MEDAL..NAZAR AMULET +1FA00..1FA53;N # So [84] NEUTRAL CHESS KING..BLACK CHESS KNIGHT-BISHOP +1FA60..1FA6D;N # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER +1FA70..1FA7C;W # So [13] BALLET SHOES..CRUTCH +1FA80..1FA88;W # So [9] YO-YO..FLUTE +1FA90..1FABD;W # So [46] RINGED PLANET..WING +1FABF..1FAC5;W # So [7] GOOSE..PERSON WITH CROWN +1FACE..1FADB;W # So [14] MOOSE..PEA POD +1FAE0..1FAE8;W # So [9] MELTING FACE..SHAKING FACE +1FAF0..1FAF8;W # So [9] HAND WITH INDEX FINGER AND THUMB CROSSED..RIGHTWARDS PUSHING HAND +1FB00..1FB92;N # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK +1FB94..1FBCA;N # So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON +1FBF0..1FBF9;N # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE +20000..2A6DF;W # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF +2A6E0..2A6FF;W # Cn [32] .. +2A700..2B739;W # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 +2B73A..2B73F;W # Cn [6] .. +2B740..2B81D;W # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D +2B81E..2B81F;W # Cn [2] .. +2B820..2CEA1;W # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 +2CEA2..2CEAF;W # Cn [14] .. +2CEB0..2EBE0;W # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2EBE1..2F7FF;W # Cn [3103] .. +2F800..2FA1D;W # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D +2FA1E..2FA1F;W # Cn [2] .. +2FA20..2FFFD;W # Cn [1502] .. +30000..3134A;W # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A +3134B..3134F;W # Cn [5] .. +31350..323AF;W # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF +323B0..3FFFD;W # Cn [56398] .. +E0001;N # Cf LANGUAGE TAG +E0020..E007F;N # Cf [96] TAG SPACE..CANCEL TAG +E0100..E01EF;A # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 +F0000..FFFFD;A # Co [65534] .. +100000..10FFFD;A # Co [65534] .. + +# EOF diff --git a/libcxx/utils/data/unicode/README.txt b/libcxx/utils/data/unicode/README.txt --- a/libcxx/utils/data/unicode/README.txt +++ b/libcxx/utils/data/unicode/README.txt @@ -8,7 +8,8 @@ https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt \ https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt \ https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt \ - https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedGeneralCategory.txt + https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedGeneralCategory.txt \ + https://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt Afterwards build the `libcxx-generate-files` target to update the generated Unicode files. @@ -32,3 +33,7 @@ DerivedGeneralCategory.txt Source: https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedGeneralCategory.txt Usage: libcxx/utils/generate_escaped_output_table.py + +EastAsianWidth.txt +https://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt +Usage: libcxx/utils/generate_width_estimation_table.py diff --git a/libcxx/utils/generate_width_estimation_table.py b/libcxx/utils/generate_width_estimation_table.py new file mode 100644 --- /dev/null +++ b/libcxx/utils/generate_width_estimation_table.py @@ -0,0 +1,374 @@ +#!/usr/bin/env python +# ===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ===----------------------------------------------------------------------===## + +# The code is based on +# https://github.com/microsoft/STL/blob/main/tools/unicode_properties_parse/grapheme_break_property_data_gen.py +# +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +from io import StringIO +from pathlib import Path +from dataclasses import dataclass, field +from typing import Optional +import re +import sys + + +@dataclass +class PropertyRange: + lower: int = -1 + upper: int = -1 + prop: str = None + + +@dataclass +class Entry: + lower: int = -1 + offset: int = -1 + + +LINE_REGEX = re.compile( + r"^(?P[0-9A-F]{4,6})(?:\.\.(?P[0-9A-F]{4,6}))?\s*;\s*(?P\w+)" +) + + +def filterProperty(element: PropertyRange) -> Optional[PropertyRange]: + ### Matches property predicate? + if element.prop in ["W", "F"]: + return element + + ### Matches hardcode ranges predicate? + + # Yijing Hexagram Symbols + if element.lower >= 0x4DC0 and element.upper <= 0x4DFF: + return element + + # Miscellaneous Symbols and Pictographs + if element.lower >= 0x1F300 and element.upper <= 0x1F5FF: + return element + + # Supplemental Symbols and Pictographs + if element.lower >= 0x1F900 and element.upper <= 0x1F9FF: + return element + + return None + + +def parsePropertyLine(inputLine: str) -> Optional[PropertyRange]: + result = PropertyRange() + if m := LINE_REGEX.match(inputLine): + lower_str, upper_str, result.prop = m.group("lower", "upper", "prop") + result.lower = int(lower_str, base=16) + result.upper = result.lower + if upper_str is not None: + result.upper = int(upper_str, base=16) + return result + + else: + return None + + +def compactPropertyRanges(input: list[PropertyRange]) -> list[PropertyRange]: + """ + Merges overlapping and consecutive ranges to one range. + + Since the input properties are filtered the exact property isn't + interesting anymore. The properties in the output are merged to aid + debugging. + Merging the ranges results in fewer ranges in the output table, + reducing binary and improving lookup performance. + """ + result = list() + for x in input: + if ( + len(result) + and x.lower > result[-1].lower + and x.lower <= result[-1].upper + 1 + ): + result[-1].upper = max(result[-1].upper, x.upper) + result[-1].prop += f" {x.prop}" + continue + result.append(x) + return result + + +DATA_ARRAY_TEMPLATE = """ +/// The entries of the characters with an estimated width of 2. +/// +/// Contains the entries for [format.string.std]/12 +/// - Any code point with the East_Asian_Width="W" or East_Asian_Width="F" +/// Derived Extracted Property as described by UAX #44 +/// - U+4DC0 - U+4DFF (Yijing Hexagram Symbols) +/// - U+1F300 - U+1F5FF (Miscellaneous Symbols and Pictographs) +/// - U+1F900 - U+1F9FF (Supplemental Symbols and Pictographs) +/// +/// The data is generated from +/// - https://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt +/// - The "overrides" in [format.string.std]/12 +/// +/// The format of EastAsianWidth.txt is two fields separated by a semicolon. +/// Field 0: Unicode code point value or range of code point values +/// Field 1: East_Asian_Width property, consisting of one of the following values: +/// "A", "F", "H", "N", "Na", "W" +/// - All code points, assigned or unassigned, that are not listed +/// explicitly are given the value "N". +/// - The unassigned code points in the following blocks default to "W": +/// CJK Unified Ideographs Extension A: U+3400..U+4DBF +/// CJK Unified Ideographs: U+4E00..U+9FFF +/// CJK Compatibility Ideographs: U+F900..U+FAFF +/// - All undesignated code points in Planes 2 and 3, whether inside or +/// outside of allocated blocks, default to "W": +/// Plane 2: U+20000..U+2FFFD +/// Plane 3: U+30000..U+3FFFD +/// +/// The table is similar to the table +/// __extended_grapheme_custer_property_boundary::__entries +/// which explains the details of these classes. The only difference is this +/// table lacks a property, thus having more bits available for the size. +/// +/// The maximum code point that has an estimated width of 2 is U+3FFFD. This +/// value can be encoded in 18 bits. Thus the upper 3 bits of the code point +/// are always 0. These 3 bits are used to enlarge the offset range. This +/// optimization reduces the table in Unicode 15 from 184 to 104 entries, +/// saving 320 bytes. +/// +/// The data has 2 values: +/// - bits [0, 13] The size of the range, allowing 16384 elements. +/// - bits [14, 31] The lower bound code point of the range. The upper bound of +/// the range is lower bound + size. +inline constexpr uint32_t __entries[{size}] = {{ +{entries}}}; + +/// The upper bound entry of EastAsianWidth.txt. +/// +/// Values greater than this value may have more than 18 significant bits. +/// They always have a width of 1. This property makes it possible to store +/// the table in its compact form. +inline constexpr uint32_t __table_upper_bound = 0x{upper_bound:08x}; + +/// Returns the estimated width of a Unicode code point. +/// +/// \pre The code point is a valid Unicode code point. +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr int __estimated_width(const char32_t __code_point) noexcept {{ + // Since __table_upper_bound contains the unshifted range do the + // comparison without shifting. + if (__code_point > __table_upper_bound) [[unlikely]] + return 1; + + // When the code-point is less than the first element in the table + // the lookup is quite expensive. Since quite some scripts are in + // that range, it makes sense to validate that first. + // The std_format_spec_string_unicode benchmark gives a measurable + // improvement. + if (__code_point < (__entries[0] >> 14)) + return 1; + + ptrdiff_t __i = std::ranges::upper_bound(__entries, (__code_point << 14) | 0x3fffu) - __entries; + if (__i == 0) + return 1; + + --__i; + uint32_t __upper_bound = (__entries[__i] >> 14) + (__entries[__i] & 0x3fffu); + return 1 + (__code_point <= __upper_bound); +}} +""" + +TABLES_HPP_TEMPLATE = """ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// WARNING, this entire header is generated by +// utils/generate_width_estimation_table.py +// DO NOT MODIFY! + +// UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE +// +// See Terms of Use +// for definitions of Unicode Inc.'s Data Files and Software. +// +// NOTICE TO USER: Carefully read the following legal agreement. +// BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S +// DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"), +// YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE +// TERMS AND CONDITIONS OF THIS AGREEMENT. +// IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE +// THE DATA FILES OR SOFTWARE. +// +// COPYRIGHT AND PERMISSION NOTICE +// +// Copyright (c) 1991-2022 Unicode, Inc. All rights reserved. +// Distributed under the Terms of Use in https://www.unicode.org/copyright.html. +// +// Permission is hereby granted, free of charge, to any person obtaining +// a copy of the Unicode data files and any associated documentation +// (the "Data Files") or Unicode software and any associated documentation +// (the "Software") to deal in the Data Files or Software +// without restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, and/or sell copies of +// the Data Files or Software, and to permit persons to whom the Data Files +// or Software are furnished to do so, provided that either +// (a) this copyright and permission notice appear with all copies +// of the Data Files or Software, or +// (b) this copyright and permission notice appear in associated +// Documentation. +// +// THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT OF THIRD PARTY RIGHTS. +// IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS +// NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL +// DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, +// DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +// TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +// PERFORMANCE OF THE DATA FILES OR SOFTWARE. +// +// Except as contained in this notice, the name of a copyright holder +// shall not be used in advertising or otherwise to promote the sale, +// use or other dealings in these Data Files or Software without prior +// written authorization of the copyright holder. + +#ifndef _LIBCPP___FORMAT_WIDTH_ESTIMATION_TABLE_H +#define _LIBCPP___FORMAT_WIDTH_ESTIMATION_TABLE_H + +#include <__algorithm/ranges_upper_bound.h> +#include <__config> +#include +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER >= 20 + +namespace __width_estimation_table {{ +{content} +}} // namespace __width_estimation_table + +#endif //_LIBCPP_STD_VER >= 20 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FORMAT_WIDTH_ESTIMATION_TABLE_H""" + + +def property_ranges_to_table(ranges: list[PropertyRange]) -> list[Entry]: + # The maximum value that can be encoded in the available bits in the + # __entries table. + upper_bound = 0x3FFFF + # The maximum offset in an __entries entry. Larger offsets will be + # splitted and stored in multiple entries. + chunk = 16384 + result = list[Entry]() + high = -1 + for range in sorted(ranges, key=lambda x: x.lower): + # Validate overlapping ranges + assert range.lower > high + high = range.upper + assert high <= upper_bound + + while True: + e = Entry(range.lower, range.upper - range.lower) + if e.offset < chunk: + result.append(e) + break + e.offset = chunk - 1 + result.append(e) + range.lower += chunk + return result + + +cpp_entrytemplate = " 0x{:08x} /* {:08x} - {:08x} [{:>5}] */" + + +def generate_cpp_data(ranges: list[PropertyRange], upper_bound: int) -> str: + result = StringIO() + table = property_ranges_to_table(ranges) + result.write( + DATA_ARRAY_TEMPLATE.format( + size=len(table), + entries=", //\n".join( + [ + cpp_entrytemplate.format( + x.lower << 14 | x.offset, + x.lower, + x.lower + x.offset, + x.offset + 1, + ) + for x in table + ] + ), + upper_bound=upper_bound, + ) + ) + + return result.getvalue() + + +def generate_data_tables() -> str: + """ + Generate Unicode data for [format.string.std]/12 + """ + east_asian_width_path = ( + Path(__file__).absolute().parent + / "data" + / "unicode" + / "EastAsianWidth.txt" + ) + + properties = list() + with east_asian_width_path.open(encoding="utf-8") as f: + properties.extend( + list( + filter( + filterProperty, + [x for line in f if (x := parsePropertyLine(line))], + ) + ) + ) + # The range U+4DC0 - U+4DFF is neutral and should not be in the table + # The range U+1F300 - U+1F5FF is partly in the range, for example + # 1F300..1F320;W # So [33] CYCLONE..SHOOTING STAR + # 1F321..1F32C;N # So [12] THERMOMETER..WIND BLOWING FACE + # 1F32D..1F335;W # So [9] HOT DOG..CACTUS + # The first and last ranges are present, but the second isn't + + # Validate the hardcode ranges are present + + # Yijing Hexagram Symbols + for i in range(0x4DC0, 0x4DFF + 1): + assert [x for x in properties if i >= x.lower and i <= x.upper] + + # Miscellaneous Symbols and Pictographs + for i in range(0x1F300, 0x1F5FF + 1): + assert [x for x in properties if i >= x.lower and i <= x.upper] + + # Miscellaneous Symbols and Pictographs + for i in range(0x1F900, 0x1F9FF + 1): + assert [x for x in properties if i >= x.lower and i <= x.upper] + + data = compactPropertyRanges(sorted(properties, key=lambda x: x.lower)) + + return "\n".join([generate_cpp_data(data, data[-1].upper)]) + + +if __name__ == "__main__": + if len(sys.argv) == 2: + sys.stdout = open(sys.argv[1], "w") + print(TABLES_HPP_TEMPLATE.lstrip().format(content=generate_data_tables()))