diff --git a/libcxx/benchmarks/std_format_spec_string_unicode.bench.cpp b/libcxx/benchmarks/std_format_spec_string_unicode.bench.cpp --- a/libcxx/benchmarks/std_format_spec_string_unicode.bench.cpp +++ b/libcxx/benchmarks/std_format_spec_string_unicode.bench.cpp @@ -6,183 +6,288 @@ #ifndef _LIBCPP_HAS_NO_UNICODE -#include -#include +# include +# include +# include -#include "benchmark/benchmark.h" +# include "benchmark/benchmark.h" -#include "test_macros.h" +# include "make_string.h" -template -class tester { - static constexpr size_t size_ = N - 1; - std::array data_; +# define SV(S) MAKE_STRING_VIEW(CharT, S) -public: - explicit constexpr tester(const CharT (&input)[N]) { - auto it = data_.begin(); - for (int i = 0; i < 100; ++i) - it = std::copy_n(input, size_, it); - } - - constexpr size_t size() const noexcept { return data_.size(); } - constexpr const CharT* begin() const noexcept { return data_.begin(); } - constexpr const CharT* end() const noexcept { return data_.end(); } - - void test(benchmark::State& state) const { - for (auto _ : state) - benchmark::DoNotOptimize(std::__format_spec::__get_string_alignment( - begin(), end(), 1'000'000, 1'000'000)); - state.SetItemsProcessed(state.iterations() * size()); - } -}; - -#define TEST(u8) \ - if constexpr (std::same_as) { \ - constexpr auto p = tester{u8}; \ - p.test(state); \ - } else if constexpr (std::same_as) { \ - constexpr auto p = tester{TEST_CONCAT(u, u8)}; \ - p.test(state); \ - } else { \ - constexpr auto p = tester{TEST_CONCAT(U, u8)}; \ - p.test(state); \ - } +// generated with https://generator.lorem-ipsum.info/_latin template -static void BM_EstimateLengthNoMultiByte(benchmark::State& state) { - TEST("The quick brown fox jumps over the lazy dog"); +std::basic_string_view ascii_text() { + return SV( + R"( Lorem ipsum dolor sit amet, ne sensibus evertitur aliquando his. +Iuvaret fabulas qui ex, ex iriure iisque nostrum mea. Solum +pericula qui ad. Elitr oporteat ius ad. + +Quas rationibus ad mel. Appellantur intellegebat ad mei, ius audire volumus +consectetuer id. Ei sit definitionem mediocritatem, vim indoctum intellegat id, +dicta laboramus instructior in vix. Mel an quando malorum, id vis mollis +invidunt, placerat maiestatis comprehensam ut cum. Suas regione interesset id +per, et docendi accumsan has, autem atomorum est te. + +Cu debitis ancillae sea, alii definitiones ex cum, vim no erat antiopam. Eam et +unum quas scriptorem. An bonorum elaboraret complectitur nam, vim ei persecuti +democritum mediocritatem. Suscipit platonem signiferumque ei cum, in sale +volutpat ocurreret vel. Te vel nihil nominavi adipiscing, stet ancillae mel ea. +Sit detraxit menandri platonem ea, cum at tale viris virtute. + +Regione detraxit gloriatur sit eu, sonet labitur sententiae et pro, at sit +alterum aliquid interpretaris. Sonet voluptua duo id, vix ea accumsan +liberavisse. Nam id commune probatus contentiones. Et zril dolore laudem duo, +ea usu mollis melius referrentur, vel ex case consequuntur. Id nam illum mollis +ponderum. Quis tamquam ullamcorper sed ne, legimus vituperatoribus est id. + +Et eum probo consulatu. At eos errem aliquando theophrastus, sea ad eius omnis. +No vis iusto scriptorem adversarium, dicat viderer ea sit. Et veri euripidis +sea, justo putent iudicabit vim id. Sea suas tincidunt vituperatoribus in. Ne +eam aeterno sensibus concludaturque, solet legere his id, usu ei dicat +dissentiunt. Est et autem erant. + +Per quod laboramus an. Dico voluptua at mea, an animal minimum eum. Pri an +option salutatus, causae feugiat menandri an sed. Voluptaria dissentiet vix ut, +alii solet te quo, in facer ceteros eos. Ad nibh meis percipitur sit, +aliquam molestie cu vis, iisque malorum interesset et eos. + +Eos in feugiat insolens abhorreant. Ea tale esse alienum has, mel et saperet +appellantur, aliquip salutandi deterruisset ut mel. Eos ei quod simul +interpretaris, aeque elitr putent per at, et veri eripuit ceteros his. Cu pro +meis aperiam volutpat, ex alterum scripserit ius, scriptorem deterruisset eu +qui. Graeco debitis lobortis cu mea. + +Alii corpora id ius, cu quo oblique eloquentiam. Et duis civibus atomorum sea, +veniam utroque scriptorem vim cu. Ut oratio eruditi mediocritatem est. Amet +nibh dolore mea ea, tollit laoreet eligendi qui ex, cu essent forensibus +his. + +Usu ex ipsum apeirian, eos congue scripserit omittantur et. Ea eum persecuti +deseruisse, probatus torquatos est no, in has mutat mundi dolorem. Albucius +sensibus ex cum. Ferri virtute referrentur an per, est choro option bonorum ex. + +Quando accusam vis te, tale mazim et pro. Magna dolorem tincidunt +nec te, albucius adipisci ad pri. Magna facilisi adipisci at usu, et vel +dissentiunt neglegentur, prima audiam vocibus an duo. Enim detracto te sea, mel +quis dicit gubergren ex, iusto adversarium consequuntur per ne. + +)"); } template -static void BM_EstimateLengthTwoByteDE(benchmark::State& state) { - static_assert(sizeof("Victor jagt zwölf Boxkämpfer quer über den großen Sylter Deich") == 67); - - // https://en.wikipedia.org/wiki/Pangram - TEST("Victor jagt zwölf Boxkämpfer quer über den großen Sylter Deich"); +std::basic_string_view unicode_text() { + return SV( + R"(Lōrem ipsūm dolor sīt æmeÞ, ea vel nostrud feuġǣit, muciūs tēmporiȝus +refērrēnÞur no mel, quo placērǽt consecÞetuer cū. Veri soƿet euripīðis id has, +sumo paulō dissentias duo eī, dētrāxīt neglēgeƿtur ið prī. Sēd option oporÞerē +no. Nec ēū nēmore mentitum. Veri prōȝo faċilis āt vīm. + +Ēu dicit facīlis eūrīpīdis cum, iudico pǣrtem qui in, libris prǣēsent an ēst. +Æt sit quoðsi impētus, nec ex qūaeque honestǣtīs. Fiērēƿt ƿōluisse verterem iƿ +ēst. Meī eæ apēriæm fierent peÞentīūm. Eæm officiīs reprehēndunt nē. + +Ut vel quodsī contentioƿes, his eū dignissim īnstruċÞior. Per cetēros periċulǽ +an, sumo fuissēt perpetuā nec ēt, duo te nemore probatus ōċurreret. Mel ǣd +civībus ocūrreret. Ex nostro ǣliquam usu, ex Þātīon adipiscī qui. Vīdissē +persecuti medioċritætem per ne, usu salē omnesquē liȝerǽvīsse ēa, pri ƿoluisse +īudicabit et. No summo quiðǣm nec, vim ēi nūmqūam sænctus concepÞǣm. Reque +doceƿdi īn īus, porro eripuiÞ intērprētaris pri in. + +Idquē hǣbēmus nominati vix cū. AÞ prō ǽmēt elit periculæ. Has virīs viderer ān. +Mel in suās pericūlīs āppellantur, nonumes deserūƿt ǽðversarium eā has. ĒliÞ +possīt commuƿe no ēsÞ, niȝh aċcusāmūs volūpÞatum no mel, ut quō ciȝo ðiceret. +Inǣni scripta quālīsque nē qūi, ad ipsūm persecuÞi mediōcritæÞēm vel. + +Ǣppetere definitiōnes mel id. Leġerē āliquip nam eǣ, rēgione viderer pǣtrioque +duo te, meƿāƿdri prodēsseÞ ex hīs. Solum quidam eæ iūs, mēl ǣt sapientem +expliċari. Īƿ ǣċcusǣm phǽedrum pro, ex pro dēleƿit detræxit hendrerīt, sit āgam +quidām pertinax uÞ. Ēssent rætionibus eǽ vēl, quo ān labore nusquæm nominǣti. + +Te alii cōnseÞetur ƿam, eam ēt puteƿÞ ðissentiæs. Qūi alii dicānt repuðiære ēā, +nō mel ferri nūsquam. Ea vim impedīt vertērem, ǣn per veri Þīmeam. SiÞ ōmitÞǽm +necēssitǣÞibus ex, ƿe vis inǣni pærtem invenire. Īd ðolores ċonsēċÞeÞuer usu, +īd vis nisl dēnique luptǣtūm. Pro ǽd ēverti option dēserūƿt, nec te ōðiō +cīvībūs. + +Ēæ nibh æccommodarē eum. Ne etiæm īudico dicunt duo, quo tēmpor populo insōlens +nē. Ēos eÞ ēirmod prǽēsēƿt. Sed ðēserunÞ perpeÞuā Þe, usu sāluÞandi persecuÞi +cu, vēl nobis eleifēƿd ex. + +Ƿe zrīl ūtīnam lǣtīne eǣm, eā vim rebum omitÞǣm aðipisciƿg. Amet inermis +epiċūri ut est, eu duo hīnc periċulis. Mel no reque simul volupÞātum, ex mutat +lāudem tacīmatēs cum. Te hǣs summo iƿteġre recteque. No iūs dicerēt +ðisputǽtioƿi. Vim ōmnis deleƿiÞi honestātis ēǽ. + +Nec detrǣcto pērcipitur ne. Ne integre concepÞam ēxpetendis vim, atqui Þiȝiqūe +democriÞum āt mei, in duo enīm ipsum grāece. Rebum ðefīnīÞionem āt pri, ēt sit +brute periculis. Ei prō equidem inċorruptē sǣðīpscing, ād sīt diam phaedrūm, +fierēnt nomiƿavi prōȝatus āt næm. Wisi ƿæÞūm coƿsecteÞuer usū ea. +)"); } template -static void BM_EstimateLengthTwoBytePL(benchmark::State& state) { - static_assert(sizeof("Stróż pchnął kość w quiz gędźb vel fax myjń") == 53); - - // https://en.wikipedia.org/wiki/Pangram - TEST("Stróż pchnął kość w quiz gędźb vel fax myjń"); +std::basic_string_view cyrillic_text() { + return SV( + R"(Лорем ипсум долор сит амет, еу диам тамяуам принципес вис, еяуидем +цонцептам диспутандо яуи цу, иус ад натум нулла граеци. Цибо дицит омниум нец +цу, еу бруте номинави диссентиет яуо. Омниум лаборамус еу хас. Дицат +диспутатиони вис еу, цу еос миним атоморум инцидеринт. Пер хабео рецтеяуе +дигниссим ан, ех яуо сенсибус торяуатос, ан. + +Ут перпетуа партиендо принципес хис. Ат симул ностер аппареат пер. Пурто вирис +ет хис, мазим дицерет при ет. Хис саперет тибияуе сцаевола еу, сит солет +вивендум цонсеяуат те. Ид оффициис перпетуа ассентиор яуи, сед аугуе афферт +симилияуе ад, ех адмодум постулант иус. + +Про дицунт волуптатум диспутатиони ат. Вел патриояуе персецути еа, цетерос +диспутатиони ин сед, нам те веро цлита малуиссет. Цу неглегентур инструцтиор +интерпретарис еам, ипсум фабулас еи вел. Еи адхуц деленити нам, аугуе +демоцритум при ан. Вим мелиоре проприае ид, албуциус волуптуа цоррумпит дуо ан. +Латине иуварет пер ут, иус еа мунере ерипуит санцтус. + +Модус тритани иус не, вим ут мелиоре мандамус, лабитур опортере дуо но. Ад нец +витае фацилис инцоррупте, цу сед толлит сцрипторем. Сит лудус инимицус +волуптариа не. Иисяуе антиопам сапиентем сед еу. Путент волуптуа сит ех, ат иус +ребум епицури, яуи моллис елигенди ех. Проприае нолуиссе цу сеа, путент поссит +адверсариум про не. + +Ид яуо прима бонорум, дуо форенсибус яуаерендум еи, еум бруте мунере те. Еам +риденс граецо ех, аеяуе санцтус маиорум ан вел. Либрис санцтус утрояуе ест но, +еам ат реяуе порро тинцидунт, ут хинц иллуд патриояуе хис. Не солет оффендит +форенсибус хас, тамяуам опортеат елаборарет те нец, еу аугуе примис маиорум +еам. Аутем вениам импедит вис ин, прима елитр пхаедрум ест еу.)"); } -// All values below are 1100, which is is the first multi column sequence. template -static void BM_EstimateLengthThreeByteSingleColumnLow(benchmark::State& state) { - static_assert(sizeof("\u0800\u0801\u0802\u0803\u0804\u0805\u0806\u0807" - "\u0808\u0809\u080a\u080b\u080c\u080d\u080e\u080f") == - 49); +std::basic_string_view japanese_text() { + return SV( + R"(入ト年媛ろ舗学ラロ準募ケカ社金スノ屋検れう策他セヲシ引口ぎ集7独ぱクふ出車ぽでぱ円輪ルノ受打わ。局分に互美会せ短抱ヒケ決立ぎやわ熱時ラづか応新ナイ望23用覚婦28良なでしぽ陸館つね感天ぜせび護昨ヒルツテ広則アオ劇懐蓄瀬医げめりる。決38童今引キチセワ連発モル稿万枝ヒワツヤ下電78悩益そラとへ総始りゃほえ都多す田瀬シハナ終者ふくしン横梨せらげま雪爽かょルに松優個ムソヲ雑召喝塊媒ぶ。 + +紙ヤ景異ミノオ誤求レ移著ヤエヨメ広庫テハヌサ君検あ必参ワ火面るね声著ン間売力を数20談すがス禁化ッを。起そり予浩ド進皇キ試属が震二トヌ真佳速すずちし件諏フウチ聞在ス会雄ノミ必筋80戦ぶさほド聞2涙属どスれ映聞ネ掲実べ。 - TEST("\u0800\u0801\u0802\u0803\u0804\u0805\u0806\u0807" - "\u0808\u0809\u080a\u080b\u080c\u080d\u080e\u080f"); +8福びり属稿づ徳鎌ニル涼問ゃごるリ付92済トぎけッ康30業づむはつ治然二生入ざひ有動ハワチ発談ニスツ魚困摘策送ざ。個時着そてら新新ヌ鉄報たは作主ずリ可輸改量ルおず井認つてぜな会大ぼすぶし全戸ノハケレ貯治たざリな祖間ムリキ断会仕べせど。委暮ど象週トクワ流開タハ硬給ツタウ者善マラノヱ断稿リヲ東毎ツヨマ井藤ルょへ境同論エ愛図ッらフリ基38属慣葬8携ヱ校図おに岐題しね要月レユ展省わトど。 + +担がは顔研リ目問いぽべ挙介ん入番ネヌイ栄県し改治ラス健第モム得続加ホウ嘉宿置首本やぞ。78毎まが現設記ほぜね場歩ユアルヒ東的ヒ姿役ネヲ聞能ラシマヒ際形トくゃ政能万の付結ス国1教レツ引写イど扱澤は膚言けリいべ橋柔薄組こよじ。浩報すンつひ崎正念方と夫地クざす情阪スで抜長ネ娘回ハツ止資ヘニ並辞ロノ展師質18打テネ岡時ノモ泉95務えぴひつ速申後延んフるせ。 + +店てラ載独マシフ理心ス型部米た読石カ料応掲ケカキ打月在ユテニ採材イ並発イヒト旅錯っめし模能りせば連確え会准揮が。器にト画軍にぶイら式東みそお前姿リいけに身47却6記け岸5体会ゃばま映8碁よぽだ経9名トびち更躍うにふ裏高もそ提旅さぼえス。賞ぞだ月係ソ知建振イナシ説並イ見書傳ヨミ問回級エシ出所師阪ト転権がし渡平ルモケ新完ハ玲女ロトシ導複トうよふ。 + +化シセチ町74掲ネテトオ連対ヒハチモ経後ッ断連カロワ待業ぼぽねか百都へがい始塗ごげ寺帰んぽ逆力るず選英堂衛掛焼ゅ。自生トサリ探就的らね江球リルスツ主嘆4権伝ざが避掲う慶合ワ百29暮ネヤクム書能部あが席小フア部親票ーむとこ。3説ひっぜ約毎伎ナキリ缶近くなず員45姿えにけろ値付ワ着知ソルキ日医ず集新エウカケ投国チ生目ゃ棋運ぐのか寄募オチ性注経どドんて止代わくかな端期幕はかク。 +)"); } template -static void -BM_EstimateLengthThreeByteSingleColumnHigh(benchmark::State& state) { - static_assert(sizeof("\u1800\u1801\u1802\u1803\u1804\u1805\u1806\u1807" - "\u1808\u1809\u180a\u180b\u180c\u180d\u180e\u180f") == - 49); - - TEST("\u1800\u1801\u1802\u1803\u1804\u1805\u1806\u1807" - "\u1808\u1809\u180a\u180b\u180c\u180d\u180e\u180f"); +std::basic_string_view emoji_text() { + return SV( + R"( +\U0001F636\u200D\U0001F32B\uFE0F +\U0001F44B\U0001F3FB\U0001F44B\U0001F3FC\U0001F44B\U0001F3FD\U0001F44B\U0001F3FE\U0001F44B\U0001F3FF +\U0001F468\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466\U0001F1E8\U0001F1E6 +\U0001F636\u200D\U0001F32B\uFE0F +\U0001F44B\U0001F3FB\U0001F44B\U0001F3FC\U0001F44B\U0001F3FD\U0001F44B\U0001F3FE\U0001F44B\U0001F3FF +\U0001F468\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466\U0001F1E8\U0001F1E6 +\U0001F636\u200D\U0001F32B\uFE0F +\U0001F44B\U0001F3FB\U0001F44B\U0001F3FC\U0001F44B\U0001F3FD\U0001F44B\U0001F3FE\U0001F44B\U0001F3FF +\U0001F468\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466\U0001F1E8\U0001F1E6 +\U0001F636\u200D\U0001F32B\uFE0F +\U0001F44B\U0001F3FB\U0001F44B\U0001F3FC\U0001F44B\U0001F3FD\U0001F44B\U0001F3FE\U0001F44B\U0001F3FF +\U0001F468\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466\U0001F1E8\U0001F1E6 +\U0001F636\u200D\U0001F32B\uFE0F +\U0001F44B\U0001F3FB\U0001F44B\U0001F3FC\U0001F44B\U0001F3FD\U0001F44B\U0001F3FE\U0001F44B\U0001F3FF +\U0001F468\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466\U0001F1E8\U0001F1E6 +\U0001F636\u200D\U0001F32B\uFE0F +\U0001F44B\U0001F3FB\U0001F44B\U0001F3FC\U0001F44B\U0001F3FD\U0001F44B\U0001F3FE\U0001F44B\U0001F3FF +\U0001F468\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466\U0001F1E8\U0001F1E6 +\U0001F636\u200D\U0001F32B\uFE0F +\U0001F44B\U0001F3FB\U0001F44B\U0001F3FC\U0001F44B\U0001F3FD\U0001F44B\U0001F3FE\U0001F44B\U0001F3FF +\U0001F468\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466\U0001F1E8\U0001F1E6 +\U0001F636\u200D\U0001F32B\uFE0F +\U0001F44B\U0001F3FB\U0001F44B\U0001F3FC\U0001F44B\U0001F3FD\U0001F44B\U0001F3FE\U0001F44B\U0001F3FF +\U0001F468\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466\U0001F1E8\U0001F1E6 +\U0001F636\u200D\U0001F32B\uFE0F +\U0001F44B\U0001F3FB\U0001F44B\U0001F3FC\U0001F44B\U0001F3FD\U0001F44B\U0001F3FE\U0001F44B\U0001F3FF +\U0001F468\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466\U0001F1E8\U0001F1E6 +\U0001F636\u200D\U0001F32B\uFE0F +\U0001F44B\U0001F3FB\U0001F44B\U0001F3FC\U0001F44B\U0001F3FD\U0001F44B\U0001F3FE\U0001F44B\U0001F3FF +\U0001F468\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466\U0001F1E8\U0001F1E6 +\U0001F636\u200D\U0001F32B\uFE0F +\U0001F44B\U0001F3FB\U0001F44B\U0001F3FC\U0001F44B\U0001F3FD\U0001F44B\U0001F3FE\U0001F44B\U0001F3FF +\U0001F468\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466\U0001F1E8\U0001F1E6 +\U0001F636\u200D\U0001F32B\uFE0F +\U0001F44B\U0001F3FB\U0001F44B\U0001F3FC\U0001F44B\U0001F3FD\U0001F44B\U0001F3FE\U0001F44B\U0001F3FF +\U0001F468\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466\U0001F1E8\U0001F1E6 +\U0001F636\u200D\U0001F32B\uFE0F +\U0001F44B\U0001F3FB\U0001F44B\U0001F3FC\U0001F44B\U0001F3FD\U0001F44B\U0001F3FE\U0001F44B\U0001F3FF + +\U0001F468\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466\U0001F1E8\U0001F1E6 + +\U0001F636\u200D\U0001F32B\uFE0F + +\U0001F44B\U0001F3FB\U0001F44B\U0001F3FC\U0001F44B\U0001F3FD\U0001F44B\U0001F3FE\U0001F44B\U0001F3FF + +\U0001F468\u200D\U0001F469\u200D\U0001F467\u200D\U0001F466\U0001F1E8\U0001F1E6 + +\U0001F984 + +)"); } template -static void BM_EstimateLengthThreeByteDoubleColumn(benchmark::State& state) { - static_assert(sizeof("\u1100\u0801\u0802\u0803\u0804\u0805\u0806\u0807" - "\u1108\u0809\u080a\u080b\u080c\u080d\u080e\u080f") == - 49); - - TEST("\u1100\u0801\u0802\u0803\u0804\u0805\u0806\u0807" - "\u1108\u0809\u080a\u080b\u080c\u080d\u080e\u080f"); +void BM_text(benchmark::State& state, std::basic_string_view input) { + CharT buffer[5'000]; + + if constexpr (std::same_as) { + // Make sure the output buffer is large enough. + assert(std::formatted_size("{}", input) == 3000); + // The benchmark uses a large precision, which forces the formatting + // enigine to determine the estimated width. (There's no direct way to call + // this function in portable code.) + for (auto _ : state) + benchmark::DoNotOptimize(std::format_to(buffer, "{:.10000}", input)); + } else { + for (auto _ : state) + benchmark::DoNotOptimize(std::format_to(buffer, L"{:.10000}", input)); + } } template -static void BM_EstimateLengthThreeByte(benchmark::State& state) { - static_assert(sizeof("\u1400\u1501\ubbbb\uff00\u0800\u4099\uabcd\u4000" - "\u8ead\ubeef\u1111\u4987\u4321\uffff\u357a\ud50e") == - 49); +void BM_ascii_text(benchmark::State& state) { + BM_text(state, ascii_text()); +} - TEST("\u1400\u1501\ubbbb\uff00\u0800\u4099\uabcd\u4000" - "\u8ead\ubeef\u1111\u4987\u4321\uffff\u357a\ud50e"); +template +void BM_unicode_text(benchmark::State& state) { + BM_text(state, unicode_text()); } template -static void BM_EstimateLengthFourByteSingleColumn(benchmark::State& state) { - static_assert(sizeof("\U00010000\U00010001\U00010002\U00010003" - "\U00010004\U00010005\U00010006\U00010007" - "\U00010008\U00010009\U0001000a\U0001000b" - "\U0001000c\U0001000d\U0001000e\U0001000f") == 65); - - TEST("\U00010000\U00010001\U00010002\U00010003" - "\U00010004\U00010005\U00010006\U00010007" - "\U00010008\U00010009\U0001000a\U0001000b" - "\U0001000c\U0001000d\U0001000e\U0001000f"); +void BM_cyrillic_text(benchmark::State& state) { + BM_text(state, cyrillic_text()); } template -static void BM_EstimateLengthFourByteDoubleColumn(benchmark::State& state) { - static_assert(sizeof("\U00020000\U00020002\U00020002\U00020003" - "\U00020004\U00020005\U00020006\U00020007" - "\U00020008\U00020009\U0002000a\U0002000b" - "\U0002000c\U0002000d\U0002000e\U0002000f") == 65); - - TEST("\U00020000\U00020002\U00020002\U00020003" - "\U00020004\U00020005\U00020006\U00020007" - "\U00020008\U00020009\U0002000a\U0002000b" - "\U0002000c\U0002000d\U0002000e\U0002000f"); +void BM_japanese_text(benchmark::State& state) { + BM_text(state, japanese_text()); } template -static void BM_EstimateLengthFourByte(benchmark::State& state) { - static_assert(sizeof("\U00010000\U00010001\U00010002\U00010003" - "\U00020004\U00020005\U00020006\U00020007" - "\U00010008\U00010009\U0001000a\U0001000b" - "\U0002000c\U0002000d\U0002000e\U0002000f") == 65); - - TEST("\U00010000\U00010001\U00010002\U00010003" - "\U00020004\U00020005\U00020006\U00020007" - "\U00010008\U00010009\U0001000a\U0001000b" - "\U0002000c\U0002000d\U0002000e\U0002000f"); +void BM_emoji_text(benchmark::State& state) { + BM_text(state, emoji_text()); } -BENCHMARK_TEMPLATE(BM_EstimateLengthNoMultiByte, char); -BENCHMARK_TEMPLATE(BM_EstimateLengthTwoByteDE, char); -BENCHMARK_TEMPLATE(BM_EstimateLengthTwoBytePL, char); -BENCHMARK_TEMPLATE(BM_EstimateLengthThreeByteSingleColumnLow, char); -BENCHMARK_TEMPLATE(BM_EstimateLengthThreeByteSingleColumnHigh, char); -BENCHMARK_TEMPLATE(BM_EstimateLengthThreeByteDoubleColumn, char); -BENCHMARK_TEMPLATE(BM_EstimateLengthThreeByte, char); -BENCHMARK_TEMPLATE(BM_EstimateLengthFourByteSingleColumn, char); -BENCHMARK_TEMPLATE(BM_EstimateLengthFourByteDoubleColumn, char); -BENCHMARK_TEMPLATE(BM_EstimateLengthFourByte, char); - -BENCHMARK_TEMPLATE(BM_EstimateLengthNoMultiByte, char16_t); -BENCHMARK_TEMPLATE(BM_EstimateLengthTwoByteDE, char16_t); -BENCHMARK_TEMPLATE(BM_EstimateLengthTwoBytePL, char16_t); -BENCHMARK_TEMPLATE(BM_EstimateLengthThreeByteSingleColumnLow, char16_t); -BENCHMARK_TEMPLATE(BM_EstimateLengthThreeByteSingleColumnHigh, char16_t); -BENCHMARK_TEMPLATE(BM_EstimateLengthThreeByteDoubleColumn, char16_t); -BENCHMARK_TEMPLATE(BM_EstimateLengthThreeByte, char16_t); -BENCHMARK_TEMPLATE(BM_EstimateLengthFourByteSingleColumn, char16_t); -BENCHMARK_TEMPLATE(BM_EstimateLengthFourByteDoubleColumn, char16_t); -BENCHMARK_TEMPLATE(BM_EstimateLengthFourByte, char16_t); - -BENCHMARK_TEMPLATE(BM_EstimateLengthNoMultiByte, char32_t); -BENCHMARK_TEMPLATE(BM_EstimateLengthTwoByteDE, char32_t); -BENCHMARK_TEMPLATE(BM_EstimateLengthTwoBytePL, char32_t); -BENCHMARK_TEMPLATE(BM_EstimateLengthThreeByteSingleColumnLow, char32_t); -BENCHMARK_TEMPLATE(BM_EstimateLengthThreeByteSingleColumnHigh, char32_t); -BENCHMARK_TEMPLATE(BM_EstimateLengthThreeByteDoubleColumn, char32_t); -BENCHMARK_TEMPLATE(BM_EstimateLengthThreeByte, char32_t); -BENCHMARK_TEMPLATE(BM_EstimateLengthFourByteSingleColumn, char32_t); -BENCHMARK_TEMPLATE(BM_EstimateLengthFourByteDoubleColumn, char32_t); -BENCHMARK_TEMPLATE(BM_EstimateLengthFourByte, char32_t); +BENCHMARK_TEMPLATE(BM_ascii_text, char); +BENCHMARK_TEMPLATE(BM_unicode_text, char); +BENCHMARK_TEMPLATE(BM_cyrillic_text, char); +BENCHMARK_TEMPLATE(BM_japanese_text, char); +BENCHMARK_TEMPLATE(BM_emoji_text, char); + +BENCHMARK_TEMPLATE(BM_ascii_text, wchar_t); +BENCHMARK_TEMPLATE(BM_unicode_text, wchar_t); +BENCHMARK_TEMPLATE(BM_cyrillic_text, wchar_t); +BENCHMARK_TEMPLATE(BM_japanese_text, wchar_t); +BENCHMARK_TEMPLATE(BM_emoji_text, wchar_t); int main(int argc, char** argv) { benchmark::Initialize(&argc, argv); diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -222,6 +222,7 @@ __format/buffer.h __format/concepts.h __format/enable_insertable.h + __format/extended_grapheme_cluster_table.h __format/format_arg.h __format/format_arg_store.h __format/format_args.h @@ -240,6 +241,7 @@ __format/formatter_pointer.h __format/formatter_string.h __format/parser_std_format_spec.h + __format/unicode.h __functional/binary_function.h __functional/binary_negate.h __functional/bind.h diff --git a/libcxx/include/__format/extended_grapheme_cluster_table.h b/libcxx/include/__format/extended_grapheme_cluster_table.h new file mode 100644 --- /dev/null +++ b/libcxx/include/__format/extended_grapheme_cluster_table.h @@ -0,0 +1,330 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// WARNING, this entire header is generated by +// utiles/generate_extended_grapheme_cluster_table.py +// DO NOT MODIFY! + +// UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE +// +// See Terms of Use +// for definitions of Unicode Inc.'s Data Files and Software. +// +// NOTICE TO USER: Carefully read the following legal agreement. +// BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S +// DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"), +// YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE +// TERMS AND CONDITIONS OF THIS AGREEMENT. +// IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE +// THE DATA FILES OR SOFTWARE. +// +// COPYRIGHT AND PERMISSION NOTICE +// +// Copyright (c) 1991-2022 Unicode, Inc. All rights reserved. +// Distributed under the Terms of Use in https://www.unicode.org/copyright.html. +// +// Permission is hereby granted, free of charge, to any person obtaining +// a copy of the Unicode data files and any associated documentation +// (the "Data Files") or Unicode software and any associated documentation +// (the "Software") to deal in the Data Files or Software +// without restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, and/or sell copies of +// the Data Files or Software, and to permit persons to whom the Data Files +// or Software are furnished to do so, provided that either +// (a) this copyright and permission notice appear with all copies +// of the Data Files or Software, or +// (b) this copyright and permission notice appear in associated +// Documentation. +// +// THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT OF THIRD PARTY RIGHTS. +// IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS +// NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL +// DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, +// DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +// TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +// PERFORMANCE OF THE DATA FILES OR SOFTWARE. +// +// Except as contained in this notice, the name of a copyright holder +// shall not be used in advertising or otherwise to promote the sale, +// use or other dealings in these Data Files or Software without prior +// written authorization of the copyright holder. + +#ifndef _LIBCPP___FORMAT_EXTENDED_GRAPHEME_CLUSTER_TABLE_H +#define _LIBCPP___FORMAT_EXTENDED_GRAPHEME_CLUSTER_TABLE_H + +#include <__algorithm/upper_bound.h> +#include <__config> +#include <__iterator/access.h> +#include +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 17 + +namespace __extended_grapheme_custer_property_boundary { + +enum class __property : uint8_t { + // Values generated from data files. + __CR, + __Control, + __Extend, + __Extended_Pictographic, + __L, + __LF, + __LV, + __LVT, + __Prepend, + __Regional_Indicator, + __SpacingMark, + __T, + __V, + __ZWJ, + + // The properies below aren't stored in the "database". + + // Text position properties. + __sot, + __eot, + + // The code unit has none of above properties. + __none +}; + +/// The entry of the extended grapheme cluster bondary property tqble. +/// +/// The original MSVC STL code stores the data in two /// parallel arrays: +/// - One uint32_t with the lower bounds for the code points. +/// - One uint16_t with the size and the property. +/// This requires 6 bytes per entry. +/// +/// In libc++ this is stored in an array with 4 bytes per entry: +/// - An unicode code point is restricted to 21-bit values. +/// - There are 14 properties requiring 4 bits. +/// - This leaves 7 bits to encode the range of an entry. +/// +/// MSVC STL has 12 bits for the range (4096 values) and libc++ 7 bits +/// (128 values). For most ranges 7 bits is sufficient. When a range has more +/// than 128 entries the range is split in multiple entries. +/// +/// Based on Unicode 12 the difference is +/// MSVC STL 1602 * 6 = 9612 bytes +/// libc++ 1643 * 4 = 6572 bytes +/// ========== - +/// saving 3040 bytes +/// +/// The measured overheaded of the additional masking is about 3% in +/// libcxx/benchmarks/std_format_spec_string_unicode.bench.cpp +struct __entry { + /// Code point of the lower bound of the range. + uint32_t __lower_bound : 21; + /// Offset to the upper bound of the range, 0 means a range of 1 code unite. + uint32_t __offset : 7; + /// The propery of the range. + uint32_t __property : 4; +}; + +static_assert(sizeof(__entry) == sizeof(uint32_t)); + +/// The table with the extended grapheme cluster bondary properties. +/// +/// The contents are generated by a script. +inline constexpr uint32_t __entries[1453] = { + 0x00000091, 0x00005005, 0x00005811, 0x00006800, 0x00007111, 0x0003fa01, 0x00054803, 0x00056801, 0x00057003, + 0x001806f2, 0x00241862, 0x002c8ac2, 0x002df802, 0x002e0812, 0x002e2012, 0x002e3802, 0x00300058, 0x003080a2, + 0x0030e001, 0x00325942, 0x00338002, 0x0036b062, 0x0036e808, 0x0036f852, 0x00373812, 0x00375032, 0x00387808, + 0x00388802, 0x003981a2, 0x003d30a2, 0x003f5882, 0x003fe802, 0x0040b032, 0x0040d882, 0x00412822, 0x00414842, + 0x0042c822, 0x004698e2, 0x00471008, 0x004719f2, 0x0048180a, 0x0049d002, 0x0049d80a, 0x0049e002, 0x0049f02a, + 0x004a0872, 0x004a483a, 0x004a6802, 0x004a701a, 0x004a8862, 0x004b1012, 0x004c0802, 0x004c101a, 0x004de002, + 0x004df002, 0x004df81a, 0x004e0832, 0x004e381a, 0x004e581a, 0x004e6802, 0x004eb802, 0x004f1012, 0x004ff002, + 0x00500812, 0x0050180a, 0x0051e002, 0x0051f02a, 0x00520812, 0x00523812, 0x00525822, 0x00528802, 0x00538012, + 0x0053a802, 0x00540812, 0x0054180a, 0x0055e002, 0x0055f02a, 0x00560842, 0x00563812, 0x0056480a, 0x0056581a, + 0x00566802, 0x00571012, 0x0057d052, 0x00580802, 0x0058101a, 0x0059e002, 0x0059f012, 0x005a000a, 0x005a0832, + 0x005a381a, 0x005a581a, 0x005a6802, 0x005ab012, 0x005b1012, 0x005c1002, 0x005df002, 0x005df80a, 0x005e0002, + 0x005e081a, 0x005e302a, 0x005e502a, 0x005e6802, 0x005eb802, 0x00600002, 0x0060082a, 0x00602002, 0x0061f022, + 0x0062083a, 0x00623022, 0x00625032, 0x0062a812, 0x00631012, 0x00640802, 0x0064101a, 0x0065e002, 0x0065f00a, + 0x0065f802, 0x0066001a, 0x00661002, 0x0066181a, 0x00663002, 0x0066381a, 0x0066501a, 0x00666012, 0x0066a812, + 0x00671012, 0x00680012, 0x0068101a, 0x0069d812, 0x0069f002, 0x0069f81a, 0x006a0832, 0x006a302a, 0x006a502a, + 0x006a6802, 0x006a7008, 0x006ab802, 0x006b1012, 0x006c101a, 0x006e5002, 0x006e7802, 0x006e801a, 0x006e9022, + 0x006eb002, 0x006ec06a, 0x006ef802, 0x006f901a, 0x00718802, 0x0071980a, 0x0071a062, 0x00723872, 0x00758802, + 0x0075980a, 0x0075a082, 0x00764052, 0x0078c012, 0x0079a802, 0x0079b802, 0x0079c802, 0x0079f01a, 0x007b88d2, + 0x007bf80a, 0x007c0042, 0x007c3012, 0x007c68a2, 0x007cca32, 0x007e3002, 0x00816832, 0x0081880a, 0x00819052, + 0x0081c812, 0x0081d81a, 0x0081e812, 0x0082b01a, 0x0082c012, 0x0082f022, 0x00838832, 0x00841002, 0x0084200a, + 0x00842812, 0x00846802, 0x0084e802, 0x008805f4, 0x008b047c, 0x008d457b, 0x009ae822, 0x00b89022, 0x00b99022, + 0x00ba9012, 0x00bb9012, 0x00bda012, 0x00bdb00a, 0x00bdb862, 0x00bdf07a, 0x00be3002, 0x00be381a, 0x00be48a2, + 0x00bee802, 0x00c05822, 0x00c07001, 0x00c42812, 0x00c54802, 0x00c90022, 0x00c9183a, 0x00c93812, 0x00c9482a, + 0x00c9801a, 0x00c99002, 0x00c9985a, 0x00c9c822, 0x00d0b812, 0x00d0c81a, 0x00d0d802, 0x00d2a80a, 0x00d2b002, + 0x00d2b80a, 0x00d2c062, 0x00d30002, 0x00d31002, 0x00d32872, 0x00d3685a, 0x00d39892, 0x00d3f802, 0x00d580e2, + 0x00d80032, 0x00d8200a, 0x00d9a062, 0x00d9d80a, 0x00d9e002, 0x00d9e84a, 0x00da1002, 0x00da181a, 0x00db5882, + 0x00dc0012, 0x00dc100a, 0x00dd080a, 0x00dd1032, 0x00dd301a, 0x00dd4012, 0x00dd500a, 0x00dd5822, 0x00df3002, + 0x00df380a, 0x00df4012, 0x00df502a, 0x00df6802, 0x00df700a, 0x00df7822, 0x00df901a, 0x00e1207a, 0x00e16072, + 0x00e1a01a, 0x00e1b012, 0x00e68022, 0x00e6a0c2, 0x00e7080a, 0x00e71062, 0x00e76802, 0x00e7a002, 0x00e7b80a, + 0x00e7c012, 0x00ee0392, 0x00efd842, 0x01005801, 0x01006002, 0x0100680d, 0x01007011, 0x01014061, 0x0101e003, + 0x01024803, 0x010300f1, 0x01068202, 0x01091003, 0x0109c803, 0x010ca053, 0x010d4813, 0x0118d013, 0x01194003, + 0x011c4003, 0x011e7803, 0x011f48a3, 0x011fc023, 0x01261003, 0x012d5013, 0x012db003, 0x012e0003, 0x012fd833, + 0x01300053, 0x013038b3, 0x0130a713, 0x01348753, 0x013840a3, 0x0138a003, 0x0138b003, 0x0138e803, 0x01390803, + 0x01394003, 0x01399813, 0x013a2003, 0x013a3803, 0x013a6003, 0x013a7003, 0x013a9823, 0x013ab803, 0x013b1843, + 0x013ca823, 0x013d0803, 0x013d8003, 0x013df803, 0x0149a013, 0x01582823, 0x0158d813, 0x015a8003, 0x015aa803, + 0x01677822, 0x016bf802, 0x016f01f2, 0x01815052, 0x01818003, 0x0181e803, 0x0184c812, 0x0194b803, 0x0194c803, + 0x05337832, 0x0533a092, 0x0534f012, 0x05378012, 0x05401002, 0x05403002, 0x05405802, 0x0541181a, 0x05412812, + 0x0541380a, 0x0544001a, 0x0545a0fa, 0x05462012, 0x05470112, 0x0547f802, 0x05493072, 0x054a38a2, 0x054a901a, + 0x054b01c4, 0x054c0022, 0x054c180a, 0x054d9802, 0x054da01a, 0x054db032, 0x054dd01a, 0x054de012, 0x054df02a, + 0x054f2802, 0x05514852, 0x0551781a, 0x05518812, 0x0551981a, 0x0551a812, 0x05521802, 0x05526002, 0x0552680a, + 0x0553e002, 0x05558002, 0x05559022, 0x0555b812, 0x0555f012, 0x05560802, 0x0557580a, 0x05576012, 0x0557701a, + 0x0557a80a, 0x0557b002, 0x055f181a, 0x055f2802, 0x055f301a, 0x055f4002, 0x055f481a, 0x055f600a, 0x055f6802, + 0x05600006, 0x056009a7, 0x0560e006, 0x0560e9a7, 0x0561c006, 0x0561c9a7, 0x0562a006, 0x0562a9a7, 0x05638006, + 0x056389a7, 0x05646006, 0x056469a7, 0x05654006, 0x056549a7, 0x05662006, 0x056629a7, 0x05670006, 0x056709a7, + 0x0567e006, 0x0567e9a7, 0x0568c006, 0x0568c9a7, 0x0569a006, 0x0569a9a7, 0x056a8006, 0x056a89a7, 0x056b6006, + 0x056b69a7, 0x056c4006, 0x056c49a7, 0x056d2006, 0x056d29a7, 0x056e0006, 0x056e09a7, 0x056ee006, 0x056ee9a7, + 0x056fc006, 0x056fc9a7, 0x0570a006, 0x0570a9a7, 0x05718006, 0x057189a7, 0x05726006, 0x057269a7, 0x05734006, + 0x057349a7, 0x05742006, 0x057429a7, 0x05750006, 0x057509a7, 0x0575e006, 0x0575e9a7, 0x0576c006, 0x0576c9a7, + 0x0577a006, 0x0577a9a7, 0x05788006, 0x057889a7, 0x05796006, 0x057969a7, 0x057a4006, 0x057a49a7, 0x057b2006, + 0x057b29a7, 0x057c0006, 0x057c09a7, 0x057ce006, 0x057ce9a7, 0x057dc006, 0x057dc9a7, 0x057ea006, 0x057ea9a7, + 0x057f8006, 0x057f89a7, 0x05806006, 0x058069a7, 0x05814006, 0x058149a7, 0x05822006, 0x058229a7, 0x05830006, + 0x058309a7, 0x0583e006, 0x0583e9a7, 0x0584c006, 0x0584c9a7, 0x0585a006, 0x0585a9a7, 0x05868006, 0x058689a7, + 0x05876006, 0x058769a7, 0x05884006, 0x058849a7, 0x05892006, 0x058929a7, 0x058a0006, 0x058a09a7, 0x058ae006, + 0x058ae9a7, 0x058bc006, 0x058bc9a7, 0x058ca006, 0x058ca9a7, 0x058d8006, 0x058d89a7, 0x058e6006, 0x058e69a7, + 0x058f4006, 0x058f49a7, 0x05902006, 0x059029a7, 0x05910006, 0x059109a7, 0x0591e006, 0x0591e9a7, 0x0592c006, + 0x0592c9a7, 0x0593a006, 0x0593a9a7, 0x05948006, 0x059489a7, 0x05956006, 0x059569a7, 0x05964006, 0x059649a7, + 0x05972006, 0x059729a7, 0x05980006, 0x059809a7, 0x0598e006, 0x0598e9a7, 0x0599c006, 0x0599c9a7, 0x059aa006, + 0x059aa9a7, 0x059b8006, 0x059b89a7, 0x059c6006, 0x059c69a7, 0x059d4006, 0x059d49a7, 0x059e2006, 0x059e29a7, + 0x059f0006, 0x059f09a7, 0x059fe006, 0x059fe9a7, 0x05a0c006, 0x05a0c9a7, 0x05a1a006, 0x05a1a9a7, 0x05a28006, + 0x05a289a7, 0x05a36006, 0x05a369a7, 0x05a44006, 0x05a449a7, 0x05a52006, 0x05a529a7, 0x05a60006, 0x05a609a7, + 0x05a6e006, 0x05a6e9a7, 0x05a7c006, 0x05a7c9a7, 0x05a8a006, 0x05a8a9a7, 0x05a98006, 0x05a989a7, 0x05aa6006, + 0x05aa69a7, 0x05ab4006, 0x05ab49a7, 0x05ac2006, 0x05ac29a7, 0x05ad0006, 0x05ad09a7, 0x05ade006, 0x05ade9a7, + 0x05aec006, 0x05aec9a7, 0x05afa006, 0x05afa9a7, 0x05b08006, 0x05b089a7, 0x05b16006, 0x05b169a7, 0x05b24006, + 0x05b249a7, 0x05b32006, 0x05b329a7, 0x05b40006, 0x05b409a7, 0x05b4e006, 0x05b4e9a7, 0x05b5c006, 0x05b5c9a7, + 0x05b6a006, 0x05b6a9a7, 0x05b78006, 0x05b789a7, 0x05b86006, 0x05b869a7, 0x05b94006, 0x05b949a7, 0x05ba2006, + 0x05ba29a7, 0x05bb0006, 0x05bb09a7, 0x05bbe006, 0x05bbe9a7, 0x05bcc006, 0x05bcc9a7, 0x05bda006, 0x05bda9a7, + 0x05be8006, 0x05be89a7, 0x05bf6006, 0x05bf69a7, 0x05c04006, 0x05c049a7, 0x05c12006, 0x05c129a7, 0x05c20006, + 0x05c209a7, 0x05c2e006, 0x05c2e9a7, 0x05c3c006, 0x05c3c9a7, 0x05c4a006, 0x05c4a9a7, 0x05c58006, 0x05c589a7, + 0x05c66006, 0x05c669a7, 0x05c74006, 0x05c749a7, 0x05c82006, 0x05c829a7, 0x05c90006, 0x05c909a7, 0x05c9e006, + 0x05c9e9a7, 0x05cac006, 0x05cac9a7, 0x05cba006, 0x05cba9a7, 0x05cc8006, 0x05cc89a7, 0x05cd6006, 0x05cd69a7, + 0x05ce4006, 0x05ce49a7, 0x05cf2006, 0x05cf29a7, 0x05d00006, 0x05d009a7, 0x05d0e006, 0x05d0e9a7, 0x05d1c006, + 0x05d1c9a7, 0x05d2a006, 0x05d2a9a7, 0x05d38006, 0x05d389a7, 0x05d46006, 0x05d469a7, 0x05d54006, 0x05d549a7, + 0x05d62006, 0x05d629a7, 0x05d70006, 0x05d709a7, 0x05d7e006, 0x05d7e9a7, 0x05d8c006, 0x05d8c9a7, 0x05d9a006, + 0x05d9a9a7, 0x05da8006, 0x05da89a7, 0x05db6006, 0x05db69a7, 0x05dc4006, 0x05dc49a7, 0x05dd2006, 0x05dd29a7, + 0x05de0006, 0x05de09a7, 0x05dee006, 0x05dee9a7, 0x05dfc006, 0x05dfc9a7, 0x05e0a006, 0x05e0a9a7, 0x05e18006, + 0x05e189a7, 0x05e26006, 0x05e269a7, 0x05e34006, 0x05e349a7, 0x05e42006, 0x05e429a7, 0x05e50006, 0x05e509a7, + 0x05e5e006, 0x05e5e9a7, 0x05e6c006, 0x05e6c9a7, 0x05e7a006, 0x05e7a9a7, 0x05e88006, 0x05e889a7, 0x05e96006, + 0x05e969a7, 0x05ea4006, 0x05ea49a7, 0x05eb2006, 0x05eb29a7, 0x05ec0006, 0x05ec09a7, 0x05ece006, 0x05ece9a7, + 0x05edc006, 0x05edc9a7, 0x05eea006, 0x05eea9a7, 0x05ef8006, 0x05ef89a7, 0x05f06006, 0x05f069a7, 0x05f14006, + 0x05f149a7, 0x05f22006, 0x05f229a7, 0x05f30006, 0x05f309a7, 0x05f3e006, 0x05f3e9a7, 0x05f4c006, 0x05f4c9a7, + 0x05f5a006, 0x05f5a9a7, 0x05f68006, 0x05f689a7, 0x05f76006, 0x05f769a7, 0x05f84006, 0x05f849a7, 0x05f92006, + 0x05f929a7, 0x05fa0006, 0x05fa09a7, 0x05fae006, 0x05fae9a7, 0x05fbc006, 0x05fbc9a7, 0x05fca006, 0x05fca9a7, + 0x05fd8006, 0x05fd89a7, 0x05fe6006, 0x05fe69a7, 0x05ff4006, 0x05ff49a7, 0x06002006, 0x060029a7, 0x06010006, + 0x060109a7, 0x0601e006, 0x0601e9a7, 0x0602c006, 0x0602c9a7, 0x0603a006, 0x0603a9a7, 0x06048006, 0x060489a7, + 0x06056006, 0x060569a7, 0x06064006, 0x060649a7, 0x06072006, 0x060729a7, 0x06080006, 0x060809a7, 0x0608e006, + 0x0608e9a7, 0x0609c006, 0x0609c9a7, 0x060aa006, 0x060aa9a7, 0x060b8006, 0x060b89a7, 0x060c6006, 0x060c69a7, + 0x060d4006, 0x060d49a7, 0x060e2006, 0x060e29a7, 0x060f0006, 0x060f09a7, 0x060fe006, 0x060fe9a7, 0x0610c006, + 0x0610c9a7, 0x0611a006, 0x0611a9a7, 0x06128006, 0x061289a7, 0x06136006, 0x061369a7, 0x06144006, 0x061449a7, + 0x06152006, 0x061529a7, 0x06160006, 0x061609a7, 0x0616e006, 0x0616e9a7, 0x0617c006, 0x0617c9a7, 0x0618a006, + 0x0618a9a7, 0x06198006, 0x061989a7, 0x061a6006, 0x061a69a7, 0x061b4006, 0x061b49a7, 0x061c2006, 0x061c29a7, + 0x061d0006, 0x061d09a7, 0x061de006, 0x061de9a7, 0x061ec006, 0x061ec9a7, 0x061fa006, 0x061fa9a7, 0x06208006, + 0x062089a7, 0x06216006, 0x062169a7, 0x06224006, 0x062249a7, 0x06232006, 0x062329a7, 0x06240006, 0x062409a7, + 0x0624e006, 0x0624e9a7, 0x0625c006, 0x0625c9a7, 0x0626a006, 0x0626a9a7, 0x06278006, 0x062789a7, 0x06286006, + 0x062869a7, 0x06294006, 0x062949a7, 0x062a2006, 0x062a29a7, 0x062b0006, 0x062b09a7, 0x062be006, 0x062be9a7, + 0x062cc006, 0x062cc9a7, 0x062da006, 0x062da9a7, 0x062e8006, 0x062e89a7, 0x062f6006, 0x062f69a7, 0x06304006, + 0x063049a7, 0x06312006, 0x063129a7, 0x06320006, 0x063209a7, 0x0632e006, 0x0632e9a7, 0x0633c006, 0x0633c9a7, + 0x0634a006, 0x0634a9a7, 0x06358006, 0x063589a7, 0x06366006, 0x063669a7, 0x06374006, 0x063749a7, 0x06382006, + 0x063829a7, 0x06390006, 0x063909a7, 0x0639e006, 0x0639e9a7, 0x063ac006, 0x063ac9a7, 0x063ba006, 0x063ba9a7, + 0x063c8006, 0x063c89a7, 0x063d6006, 0x063d69a7, 0x063e4006, 0x063e49a7, 0x063f2006, 0x063f29a7, 0x06400006, + 0x064009a7, 0x0640e006, 0x0640e9a7, 0x0641c006, 0x0641c9a7, 0x0642a006, 0x0642a9a7, 0x06438006, 0x064389a7, + 0x06446006, 0x064469a7, 0x06454006, 0x064549a7, 0x06462006, 0x064629a7, 0x06470006, 0x064709a7, 0x0647e006, + 0x0647e9a7, 0x0648c006, 0x0648c9a7, 0x0649a006, 0x0649a9a7, 0x064a8006, 0x064a89a7, 0x064b6006, 0x064b69a7, + 0x064c4006, 0x064c49a7, 0x064d2006, 0x064d29a7, 0x064e0006, 0x064e09a7, 0x064ee006, 0x064ee9a7, 0x064fc006, + 0x064fc9a7, 0x0650a006, 0x0650a9a7, 0x06518006, 0x065189a7, 0x06526006, 0x065269a7, 0x06534006, 0x065349a7, + 0x06542006, 0x065429a7, 0x06550006, 0x065509a7, 0x0655e006, 0x0655e9a7, 0x0656c006, 0x0656c9a7, 0x0657a006, + 0x0657a9a7, 0x06588006, 0x065889a7, 0x06596006, 0x065969a7, 0x065a4006, 0x065a49a7, 0x065b2006, 0x065b29a7, + 0x065c0006, 0x065c09a7, 0x065ce006, 0x065ce9a7, 0x065dc006, 0x065dc9a7, 0x065ea006, 0x065ea9a7, 0x065f8006, + 0x065f89a7, 0x06606006, 0x066069a7, 0x06614006, 0x066149a7, 0x06622006, 0x066229a7, 0x06630006, 0x066309a7, + 0x0663e006, 0x0663e9a7, 0x0664c006, 0x0664c9a7, 0x0665a006, 0x0665a9a7, 0x06668006, 0x066689a7, 0x06676006, + 0x066769a7, 0x06684006, 0x066849a7, 0x06692006, 0x066929a7, 0x066a0006, 0x066a09a7, 0x066ae006, 0x066ae9a7, + 0x066bc006, 0x066bc9a7, 0x066ca006, 0x066ca9a7, 0x066d8006, 0x066d89a7, 0x066e6006, 0x066e69a7, 0x066f4006, + 0x066f49a7, 0x06702006, 0x067029a7, 0x06710006, 0x067109a7, 0x0671e006, 0x0671e9a7, 0x0672c006, 0x0672c9a7, + 0x0673a006, 0x0673a9a7, 0x06748006, 0x067489a7, 0x06756006, 0x067569a7, 0x06764006, 0x067649a7, 0x06772006, + 0x067729a7, 0x06780006, 0x067809a7, 0x0678e006, 0x0678e9a7, 0x0679c006, 0x0679c9a7, 0x067aa006, 0x067aa9a7, + 0x067b8006, 0x067b89a7, 0x067c6006, 0x067c69a7, 0x067d4006, 0x067d49a7, 0x067e2006, 0x067e29a7, 0x067f0006, + 0x067f09a7, 0x067fe006, 0x067fe9a7, 0x0680c006, 0x0680c9a7, 0x0681a006, 0x0681a9a7, 0x06828006, 0x068289a7, + 0x06836006, 0x068369a7, 0x06844006, 0x068449a7, 0x06852006, 0x068529a7, 0x06860006, 0x068609a7, 0x0686e006, + 0x0686e9a7, 0x0687c006, 0x0687c9a7, 0x0688a006, 0x0688a9a7, 0x06898006, 0x068989a7, 0x068a6006, 0x068a69a7, + 0x068b4006, 0x068b49a7, 0x068c2006, 0x068c29a7, 0x068d0006, 0x068d09a7, 0x068de006, 0x068de9a7, 0x068ec006, + 0x068ec9a7, 0x068fa006, 0x068fa9a7, 0x06908006, 0x069089a7, 0x06916006, 0x069169a7, 0x06924006, 0x069249a7, + 0x06932006, 0x069329a7, 0x06940006, 0x069409a7, 0x0694e006, 0x0694e9a7, 0x0695c006, 0x0695c9a7, 0x0696a006, + 0x0696a9a7, 0x06978006, 0x069789a7, 0x06986006, 0x069869a7, 0x06994006, 0x069949a7, 0x069a2006, 0x069a29a7, + 0x069b0006, 0x069b09a7, 0x069be006, 0x069be9a7, 0x069cc006, 0x069cc9a7, 0x069da006, 0x069da9a7, 0x069e8006, + 0x069e89a7, 0x069f6006, 0x069f69a7, 0x06a04006, 0x06a049a7, 0x06a12006, 0x06a129a7, 0x06a20006, 0x06a209a7, + 0x06a2e006, 0x06a2e9a7, 0x06a3c006, 0x06a3c9a7, 0x06a4a006, 0x06a4a9a7, 0x06a58006, 0x06a589a7, 0x06a66006, + 0x06a669a7, 0x06a74006, 0x06a749a7, 0x06a82006, 0x06a829a7, 0x06a90006, 0x06a909a7, 0x06a9e006, 0x06a9e9a7, + 0x06aac006, 0x06aac9a7, 0x06aba006, 0x06aba9a7, 0x06ac8006, 0x06ac89a7, 0x06ad6006, 0x06ad69a7, 0x06ae4006, + 0x06ae49a7, 0x06af2006, 0x06af29a7, 0x06b00006, 0x06b009a7, 0x06b0e006, 0x06b0e9a7, 0x06b1c006, 0x06b1c9a7, + 0x06b2a006, 0x06b2a9a7, 0x06b38006, 0x06b389a7, 0x06b46006, 0x06b469a7, 0x06b54006, 0x06b549a7, 0x06b62006, + 0x06b629a7, 0x06b70006, 0x06b709a7, 0x06b7e006, 0x06b7e9a7, 0x06b8c006, 0x06b8c9a7, 0x06b9a006, 0x06b9a9a7, + 0x06ba8006, 0x06ba89a7, 0x06bb6006, 0x06bb69a7, 0x06bc4006, 0x06bc49a7, 0x06bd816c, 0x06be5b0b, 0x07d8f002, + 0x07f000f2, 0x07f100f2, 0x07f7f801, 0x07fcf012, 0x07ff80b1, 0x080fe802, 0x08170002, 0x081bb042, 0x08500822, + 0x08502812, 0x08506032, 0x0851c022, 0x0851f802, 0x08572812, 0x08692032, 0x087a30a2, 0x0880000a, 0x08800802, + 0x0880100a, 0x0881c0e2, 0x0883f822, 0x0884100a, 0x0885802a, 0x08859832, 0x0885b81a, 0x0885c812, 0x0885e808, + 0x08866808, 0x08880022, 0x08893842, 0x0889600a, 0x08896872, 0x088a281a, 0x088b9802, 0x088c0012, 0x088c100a, + 0x088d982a, 0x088db082, 0x088df81a, 0x088e1018, 0x088e4832, 0x0891602a, 0x08917822, 0x0891901a, 0x0891a002, + 0x0891a80a, 0x0891b012, 0x0891f002, 0x0896f802, 0x0897002a, 0x08971872, 0x08980012, 0x0898101a, 0x0899d812, + 0x0899f002, 0x0899f80a, 0x089a0002, 0x089a083a, 0x089a381a, 0x089a582a, 0x089ab802, 0x089b101a, 0x089b3062, + 0x089b8042, 0x08a1a82a, 0x08a1c072, 0x08a2001a, 0x08a21022, 0x08a2280a, 0x08a23002, 0x08a2f002, 0x08a58002, + 0x08a5881a, 0x08a59852, 0x08a5c80a, 0x08a5d002, 0x08a5d81a, 0x08a5e802, 0x08a5f00a, 0x08a5f812, 0x08a6080a, + 0x08a61012, 0x08ad7802, 0x08ad801a, 0x08ad9032, 0x08adc03a, 0x08ade012, 0x08adf00a, 0x08adf812, 0x08aee012, + 0x08b1802a, 0x08b19872, 0x08b1d81a, 0x08b1e802, 0x08b1f00a, 0x08b1f812, 0x08b55802, 0x08b5600a, 0x08b56802, + 0x08b5701a, 0x08b58052, 0x08b5b00a, 0x08b5b802, 0x08b8e822, 0x08b9001a, 0x08b91032, 0x08b9300a, 0x08b93842, + 0x08c1602a, 0x08c17882, 0x08c1c00a, 0x08c1c812, 0x08ce882a, 0x08cea032, 0x08ced012, 0x08cee03a, 0x08cf0002, + 0x08cf200a, 0x08d00892, 0x08d19852, 0x08d1c80a, 0x08d1d008, 0x08d1d832, 0x08d23802, 0x08d28852, 0x08d2b81a, + 0x08d2c822, 0x08d42058, 0x08d450c2, 0x08d4b80a, 0x08d4c012, 0x08e1780a, 0x08e18062, 0x08e1c052, 0x08e1f00a, + 0x08e1f802, 0x08e49152, 0x08e5480a, 0x08e55062, 0x08e5880a, 0x08e59012, 0x08e5a00a, 0x08e5a812, 0x08e98852, + 0x08e9d002, 0x08e9e012, 0x08e9f862, 0x08ea3008, 0x08ea3802, 0x08ec504a, 0x08ec8012, 0x08ec981a, 0x08eca802, + 0x08ecb00a, 0x08ecb802, 0x08f79812, 0x08f7a81a, 0x09a18081, 0x0b578042, 0x0b598062, 0x0b7a7802, 0x0b7a8b6a, + 0x0b7c7832, 0x0de4e812, 0x0de50031, 0x0e8b2802, 0x0e8b300a, 0x0e8b3822, 0x0e8b680a, 0x0e8b7042, 0x0e8b9871, + 0x0e8bd872, 0x0e8c2862, 0x0e8d5032, 0x0e921022, 0x0ed00362, 0x0ed1db12, 0x0ed3a802, 0x0ed42002, 0x0ed4d842, + 0x0ed508e2, 0x0f000062, 0x0f004102, 0x0f00d862, 0x0f011812, 0x0f013042, 0x0f098062, 0x0f176032, 0x0f468062, + 0x0f4a2062, 0x0f8007f3, 0x0f8407f3, 0x0f886823, 0x0f897803, 0x0f8b6053, 0x0f8bf013, 0x0f8c7003, 0x0f8c8893, + 0x0f8d6b83, 0x0f8f3199, 0x0f9008e3, 0x0f90d003, 0x0f917803, 0x0f919083, 0x0f91e033, 0x0f924ff3, 0x0f964ff3, + 0x0f9a4ff3, 0x0f9e4b13, 0x0f9fd842, 0x0fa007f3, 0x0fa407f3, 0x0fa803d3, 0x0faa37f3, 0x0fae37f3, 0x0fb23093, + 0x0fb407f3, 0x0fbba0b3, 0x0fbeaaa3, 0x0fc06033, 0x0fc24073, 0x0fc2d053, 0x0fc44073, 0x0fc57513, 0x0fc862e3, + 0x0fc9e093, 0x0fca3ff3, 0x0fce3ff3, 0x0fd23ff3, 0x0fd63ff3, 0x0fda3ff3, 0x0fde3ff3, 0x0fe23ff3, 0x0fe63ff3, + 0x0fea3ff3, 0x0fee3ff3, 0x0ff23ff3, 0x0ff63ff3, 0x0ffa3ff3, 0x0ffe3b63, 0x700001f1, 0x700105f2, 0x700407f1, + 0x700807f2, 0x700c06f2, 0x700f87f1, 0x701387f1, 0x701787f1, 0x701b87f1, 0x701f87f1, 0x702387f1, 0x702787f1, + 0x702b87f1, 0x702f87f1, 0x703387f1, 0x703787f1, 0x703b87f1, 0x703f87f1, 0x704387f1, 0x704787f1, 0x704b87f1, + 0x704f87f1, 0x705387f1, 0x705787f1, 0x705b87f1, 0x705f87f1, 0x706387f1, 0x706787f1, 0x706b87f1, 0x706f87f1, + 0x707387f1, 0x707787f1, 0x707b87f1, 0x707f80f1}; + +/// Returns the extended grapheme cluster bondary property of a code point. +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __property __get_property(const char32_t __code_point) noexcept { + // TODO FMT use ranges + ptrdiff_t __i = std::upper_bound(__entries, std::end(__entries), (__code_point << 11) | 0x7ffu) - __entries; + if (__i == 0) + return __property::__none; + + --__i; + uint32_t __upper_bound = (__entries[__i] >> 11) + ((__entries[__i] >> 4) & 0x7f); + if (__code_point <= __upper_bound) + return static_cast<__property>(__entries[__i] & 0xf); + + return __property::__none; +} + +} // namespace __extended_grapheme_custer_property_boundary + +#endif //_LIBCPP_STD_VER > 17 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FORMAT_EXTENDED_GRAPHEME_CLUSTER_TABLE_H diff --git a/libcxx/include/__format/parser_std_format_spec.h b/libcxx/include/__format/parser_std_format_spec.h --- a/libcxx/include/__format/parser_std_format_spec.h +++ b/libcxx/include/__format/parser_std_format_spec.h @@ -17,6 +17,7 @@ #include <__format/format_arg.h> #include <__format/format_error.h> #include <__format/format_string.h> +#include <__format/unicode.h> #include <__variant/monostate.h> #include #include @@ -1014,15 +1015,15 @@ /** Helper concept for an UTF-8 character type. */ template -concept __utf8_character = same_as<_CharT, char> || same_as<_CharT, char8_t>; +concept __utf8_character = same_as<_CharT, char>; /** Helper concept for an UTF-16 character type. */ template -concept __utf16_character = (same_as<_CharT, wchar_t> && sizeof(wchar_t) == 2) || same_as<_CharT, char16_t>; +concept __utf16_character = (same_as<_CharT, wchar_t> && sizeof(wchar_t) == 2); /** Helper concept for an UTF-32 character type. */ template -concept __utf32_character = (same_as<_CharT, wchar_t> && sizeof(wchar_t) == 4) || same_as<_CharT, char32_t>; +concept __utf32_character = (same_as<_CharT, wchar_t> && sizeof(wchar_t) == 4); /** Helper concept for an UTF-16 or UTF-32 character type. */ template @@ -1089,25 +1090,10 @@ /** * Estimate the column width for the UTF-8 sequence using the fast algorithm. */ -template <__utf8_character _CharT> -_LIBCPP_HIDE_FROM_ABI constexpr const _CharT* -__estimate_column_width_fast(const _CharT* __first, - const _CharT* __last) noexcept { - return _VSTD::find_if(__first, __last, - [](unsigned char __c) { return __c & 0x80; }); -} - -/** - * @overload - * - * The implementation for UTF-16/32. - */ -template <__utf16_or_32_character _CharT> -_LIBCPP_HIDE_FROM_ABI constexpr const _CharT* -__estimate_column_width_fast(const _CharT* __first, - const _CharT* __last) noexcept { - return _VSTD::find_if(__first, __last, - [](uint32_t __c) { return __c >= 0x1100; }); +template +_LIBCPP_HIDE_FROM_ABI constexpr const _CharT* __estimate_column_width_fast(const _CharT* __first, + const _CharT* __last) noexcept { + return _VSTD::find_if(__first, __last, [](char32_t __c) { return __c >= 0x80; }); } template @@ -1151,134 +1137,28 @@ * @param __maximum The maximum number of output columns. The returned number * of estimated output columns will not exceed this value. */ -template <__utf8_character _CharT> -_LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT> -__estimate_column_width(const _CharT* __first, const _CharT* __last, - size_t __maximum) noexcept { - size_t __result = 0; - - while (__first != __last) { - // Based on the number of leading 1 bits the number of code units in the - // code point can be determined. See - // https://en.wikipedia.org/wiki/UTF-8#Encoding - switch (_VSTD::countl_one(static_cast(*__first))) { - case 0: // 1-code unit encoding: all 1 column - ++__result; - ++__first; - break; - - case 2: // 2-code unit encoding: all 1 column - // Malformed Unicode. - if (__last - __first < 2) [[unlikely]] - return __estimate_column_width_malformed(__first, __last, __maximum, - __result); - __first += 2; - ++__result; - break; - - case 3: // 3-code unit encoding: either 1 or 2 columns - // Malformed Unicode. - if (__last - __first < 3) [[unlikely]] - return __estimate_column_width_malformed(__first, __last, __maximum, - __result); - { - uint32_t __c = static_cast(*__first++) & 0x0f; - __c <<= 6; - __c |= static_cast(*__first++) & 0x3f; - __c <<= 6; - __c |= static_cast(*__first++) & 0x3f; - __result += __column_width_3(__c); - if (__result > __maximum) - return {__result - 2, __first - 3}; - } - break; - case 4: // 4-code unit encoding: either 1 or 2 columns - // Malformed Unicode. - if (__last - __first < 4) [[unlikely]] - return __estimate_column_width_malformed(__first, __last, __maximum, - __result); - { - uint32_t __c = static_cast(*__first++) & 0x07; - __c <<= 6; - __c |= static_cast(*__first++) & 0x3f; - __c <<= 6; - __c |= static_cast(*__first++) & 0x3f; - __c <<= 6; - __c |= static_cast(*__first++) & 0x3f; - __result += __column_width_4(__c); - if (__result > __maximum) - return {__result - 2, __first - 4}; - } - break; - default: - // Malformed Unicode. - return __estimate_column_width_malformed(__first, __last, __maximum, - __result); - } - - if (__result >= __maximum) - return {__result, __first}; - } - return {__result, __first}; -} - -template <__utf16_character _CharT> -_LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT> -__estimate_column_width(const _CharT* __first, const _CharT* __last, - size_t __maximum) noexcept { - size_t __result = 0; - - while (__first != __last) { - uint32_t __c = *__first; - // Is the code unit part of a surrogate pair? See - // https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF - if (__c >= 0xd800 && __c <= 0xDfff) { - // Malformed Unicode. - if (__last - __first < 2) [[unlikely]] - return {__result + 1, __first + 1}; - - __c -= 0xd800; - __c <<= 10; - __c += (*(__first + 1) - 0xdc00); - __c += 0x10000; - - __result += __column_width_4(__c); - if (__result > __maximum) - return {__result - 2, __first}; - __first += 2; - } else { - __result += __column_width_3(__c); - if (__result > __maximum) - return {__result - 2, __first}; - ++__first; - } - - if (__result >= __maximum) - return {__result, __first}; - } - - return {__result, __first}; -} - -template <__utf32_character _CharT> +template _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT> -__estimate_column_width(const _CharT* __first, const _CharT* __last, - size_t __maximum) noexcept { - size_t __result = 0; - - while (__first != __last) { - uint32_t __c = *__first; - __result += __column_width(__c); - - if (__result > __maximum) - return {__result - 2, __first}; - - ++__first; - if (__result >= __maximum) - return {__result, __first}; +__estimate_column_width(const _CharT* __first, const _CharT* __last, size_t __maximum) noexcept { + __unicode::__extended_grapheme_cluster_view<_CharT> __view{__first, __last}; + + __column_width_result<_CharT> __result{0, __first}; + while (__result.__ptr != __last && __result.__width != __maximum) { + typename __unicode::__extended_grapheme_cluster_view<_CharT>::__cluster __cluster = __view.__consume(); + int __width = __column_width(__cluster.__code_point_); + + // When the next entry would exceed the maximum width the previous width + // might be returned. For example when a width of 100 is requested the + // returned with might be 99, since the next code point has an estimated + // column width of 2. + if (__result.__width + __width > __maximum) + return __result; + + __result.__width += __width; + __result.__ptr = __cluster.__last_; } - return {__result, __first}; + return __result; } } // namespace __detail @@ -1314,6 +1194,13 @@ const _CharT* __pos = __detail::__estimate_column_width_fast(__first, __limit); + // There's a subtile issue; when __pos is non-ASCII the last code unit may be + // part of an extended graheme cluster. For example an ASCII letter and a + // COMBINING ACUTE ACCENT. In that case adjust the returned result and + // reevaluate the last code unit in a Unicode context. + if (__pos != __first && __pos != __last && static_cast(*__pos) >= 0x80) + --__pos; + if (__pos == __limit) return {__limit, __size, __size < __width}; diff --git a/libcxx/include/__format/unicode.h b/libcxx/include/__format/unicode.h new file mode 100644 --- /dev/null +++ b/libcxx/include/__format/unicode.h @@ -0,0 +1,325 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FORMAT_UNICODE_H +#define _LIBCPP___FORMAT_UNICODE_H + +#include <__assert> +#include <__config> +#include <__format/extended_grapheme_cluster_table.h> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 17 + +# ifndef _LIBCPP_HAS_NO_UNICODE + +/// Implements the grapheme cluster boundary rules +/// +/// These rules are used to implement format's width estimation as stated in +/// [format.string.std]/11 +/// +/// The Standard refers to UAX \#29 for Unicode 12.0.0 +/// https://www.unicode.org/reports/tr29/tr29-35.html#Grapheme_Cluster_Boundary_Rules +/// +/// The data tables used are +/// http://www.unicode.org/Public/12.0.0/ucd/auxiliary/GraphemeBreakProperty.txt +/// http://www.unicode.org/Public/emoji/12.0/emoji-data.txt +/// http://www.unicode.org/Public/12.0.0/ucd/auxiliary/GraphemeBreakTest.txt (for testing only) + +namespace __unicode { + +inline constexpr char32_t __replacement_character = U'\ufffd'; + +/// Helper class to extract a code unit from a Unicode character range. +/// +/// The stored range is a view. There are multiple specialization for different +/// character types. +template +class __code_point_view; + +/// UTF-8 specialization. +template <> +class __code_point_view { +public: + _LIBCPP_HIDE_FROM_ABI constexpr explicit __code_point_view(const char* __first, const char* __last) + : __first_(__first), __last_(__last) {} + + _LIBCPP_HIDE_FROM_ABI constexpr bool __at_end() const noexcept { return __first_ == __last_; } + _LIBCPP_HIDE_FROM_ABI constexpr const char* __position() const noexcept { return __first_; } + + _LIBCPP_HIDE_FROM_ABI constexpr char32_t __consume() noexcept { + _LIBCPP_ASSERT(__first_ != __last_, "can't move beyond the end of input"); + + // Based on the number of leading 1 bits the number of code units in the + // code point can be determined. See + // https://en.wikipedia.org/wiki/UTF-8#Encoding + switch (_VSTD::countl_one(static_cast(*__first_))) { + case 0: + return *__first_++; + + case 2: + if (__last_ - __first_ < 2) [[unlikely]] { + __first_ = __last_; + return __replacement_character; + } else { + char32_t __value = static_cast(*__first_++) & 0x1f; + __value <<= 6; + __value |= static_cast(*__first_++) & 0x3f; + return __value; + } + + case 3: + if (__last_ - __first_ < 3) [[unlikely]] { + __first_ = __last_; + __first_ = __last_; + return __replacement_character; + } else { + char32_t __value = static_cast(*__first_++) & 0x0f; + __value <<= 6; + __value |= static_cast(*__first_++) & 0x3f; + __value <<= 6; + __value |= static_cast(*__first_++) & 0x3f; + return __value; + } + + case 4: + if (__last_ - __first_ < 4) [[unlikely]] { + __first_ = __last_; + return __replacement_character; + } else { + char32_t __value = static_cast(*__first_++) & 0x07; + __value <<= 6; + __value |= static_cast(*__first_++) & 0x3f; + __value <<= 6; + __value |= static_cast(*__first_++) & 0x3f; + __value <<= 6; + __value |= static_cast(*__first_++) & 0x3f; + return __value; + } + } + // An invalid number of leading ones can be garbage or a code unit in the + // middle of a code point. By consuming one code unit the parser may get + // "in sync" after a few code units. + ++__first_; + return __replacement_character; + } + +private: + const char* __first_; + const char* __last_; +}; + +/// This specialization depends on the size of wchar_t +/// - 2 UTF-16 (for example Windows and AIX) +/// - 4 UTF-32 (for example Linux) +template <> +class __code_point_view { +public: + _LIBCPP_HIDE_FROM_ABI constexpr explicit __code_point_view(const wchar_t* __first, const wchar_t* __last) + : __first_(__first), __last_(__last) {} + + _LIBCPP_HIDE_FROM_ABI constexpr const wchar_t* __position() const noexcept { return __first_; } + _LIBCPP_HIDE_FROM_ABI constexpr bool __at_end() const noexcept { return __first_ == __last_; } + + _LIBCPP_HIDE_FROM_ABI constexpr char32_t __consume() noexcept { + _LIBCPP_ASSERT(__first_ != __last_, "can't move beyond the end of input"); + + if constexpr (sizeof(wchar_t) == 2) { + char32_t __result = *__first_++; + // Is the code unit part of a surrogate pair? See + // https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF + if (__result >= 0xd800 && __result <= 0xDfff) { + // Malformed Unicode. + if (__first_ == __last_) [[unlikely]] + return __replacement_character; + + __result -= 0xd800; + __result <<= 10; + __result += *__first_++ - 0xdc00; + __result += 0x10000; + } + return __result; + + } else if constexpr (sizeof(wchar_t) == 4) + return *__first_++; + + // unreachable + } + +private: + const wchar_t* __first_; + const wchar_t* __last_; +}; + +_LIBCPP_HIDE_FROM_ABI +constexpr bool __at_extended_grapheme_cluster_break( + bool& __RI_break_allowed, bool __has_extened_pictographic, + __extended_grapheme_custer_property_boundary::__property __prev, + __extended_grapheme_custer_property_boundary::__property __next_prop_) { // XXX next + using __extended_grapheme_custer_property_boundary::__property; + + __has_extened_pictographic |= __prev == __property::__Extended_Pictographic; + + // https://www.unicode.org/reports/tr29/tr29-39.html#Grapheme_Cluster_Boundary_Rules + + // *** Break at the start and end of text, unless the text is empty. *** + + _LIBCPP_ASSERT(__prev != __property::__sot, "should be handled in the constructor"); // GB1 + _LIBCPP_ASSERT(__prev != __property::__eot, "should be handled by our caller"); // GB2 + + // *** Do not break between a CR and LF. Otherwise, break before and after controls. *** + + if (__prev == __property::__CR && __next_prop_ == __property::__LF) // GB3 + return false; + + if (__prev == __property::__Control || __prev == __property::__CR || __prev == __property::__LF) // GB4 + return true; + + if (__next_prop_ == __property::__Control || __next_prop_ == __property::__CR || + __next_prop_ == __property::__LF) // GB5 + return true; + + // *** Do not break Hangul syllable sequences. *** + if (__prev == __property::__L && (__next_prop_ == __property::__L || __next_prop_ == __property::__V || + __next_prop_ == __property::__LV || __next_prop_ == __property::__LVT)) // GB6 + return false; + + if ((__prev == __property::__LV || __prev == __property::__V) && + (__next_prop_ == __property::__V || __next_prop_ == __property::__T)) // GB7 + return false; + + if ((__prev == __property::__LVT || __prev == __property::__T) && __next_prop_ == __property::__T) // GB8 + return false; + + // *** Do not break before extending characters or ZWJ. *** + if (__next_prop_ == __property::__Extend || __next_prop_ == __property::__ZWJ) + return false; // GB9 + + // *** Do not break before SpacingMarks, or after Prepend characters. *** + if (__next_prop_ == __property::__SpacingMark) // GB9a + return false; + + if (__prev == __property::__Prepend) // GB9b + return false; + + // *** Do not break within emoji modifier sequences or emoji zwj sequences. *** + + // GB11 \p{Extended_Pictographic} Extend* ZWJ x \p{Extended_Pictographic} + // + // Note that several parts of this rule are matched by GB9: Any x (Extend | ZWJ) + // - \p{Extended_Pictographic} x Extend + // - Extend x Extend + // - \p{Extended_Pictographic} x ZWJ + // - Extend x ZWJ + // + // So the only case left to test is + // - \p{Extended_Pictographic}' x ZWJ x \p{Extended_Pictographic} + // where \p{Extended_Pictographic}' is stored in __has_extened_pictographic + // + // TODO FMT add test for ZWJ \p{Extended_Pictographic} + if (__has_extened_pictographic && __prev == __property::__ZWJ && __next_prop_ == __property::__Extended_Pictographic) + return false; + + // *** Do not break within emoji flag sequences *** + + // That is, do not break between regional indicator (RI) symbols if there + // is an odd number of RI characters before the break point. + + if (__prev == __property::__Regional_Indicator && __next_prop_ == __property::__Regional_Indicator) { // GB12 + GB13 + __RI_break_allowed = !__RI_break_allowed; + if (__RI_break_allowed) + return true; + + return false; + } + + // *** Otherwise, break everywhere. *** + return true; // GB999 +} + +/// Helper class to extract an extended grapheme cluster from a Unicode character range. +/// +/// This function is used to determine the column width of an extended grapheme +/// cluster. In order to do that only the first code point is evaluated. +/// Therefore only this code point is extracted. +template +class __extended_grapheme_cluster_view { +public: + _LIBCPP_HIDE_FROM_ABI constexpr explicit __extended_grapheme_cluster_view(const _CharT* __first, const _CharT* __last) + : __code_point_view_(__first, __last), __next_code_point_(__code_point_view_.__consume()), + __next_prop_(__extended_grapheme_custer_property_boundary::__get_property(__next_code_point_)) {} + + struct __cluster { + /// The first code point of the extended grapheme cluster. + /// + /// The first code point is used to estimate the width of the extended + /// grapheme cluster. + char32_t __code_point_; + + /// Points one beyond the last code unit in the extended grapheme cluster. + /// + /// It's expected the caller has the start position and thus can determine + /// the code unit range of the extended grapheme cluster. + const _CharT* __last_; + }; + + _LIBCPP_HIDE_FROM_ABI constexpr __cluster __consume() { + _LIBCPP_ASSERT(__next_prop_ != __extended_grapheme_custer_property_boundary::__property::__eot, + "can't move beyond the end of input"); + char32_t __code_point = __next_code_point_; + if (!__code_point_view_.__at_end()) + return {__code_point, __get_break()}; + + __next_prop_ = __extended_grapheme_custer_property_boundary::__property::__eot; + return {__code_point, __code_point_view_.__position()}; + } + +private: + __code_point_view<_CharT> __code_point_view_; + + char32_t __next_code_point_; + __extended_grapheme_custer_property_boundary::__property __next_prop_; + + _LIBCPP_HIDE_FROM_ABI constexpr const _CharT* __get_break() { + bool __RI_break_allowed = true; + bool __has_extened_pictographic = false; + while (true) { + const _CharT* __result = __code_point_view_.__position(); + __extended_grapheme_custer_property_boundary::__property __prev = __next_prop_; + if (__code_point_view_.__at_end()) { + __next_prop_ = __extended_grapheme_custer_property_boundary::__property::__eot; + return __result; + } + __next_code_point_ = __code_point_view_.__consume(); + __next_prop_ = __extended_grapheme_custer_property_boundary::__get_property(__next_code_point_); + + __has_extened_pictographic |= + __prev == __extended_grapheme_custer_property_boundary::__property::__Extended_Pictographic; + + if (__at_extended_grapheme_cluster_break(__RI_break_allowed, __has_extened_pictographic, __prev, __next_prop_)) + return __result; + } + } +}; + +} // namespace __unicode + +# endif // _LIBCPP_HAS_NO_UNICODE + +#endif //_LIBCPP_STD_VER > 17 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FORMAT_UNICODE_H diff --git a/libcxx/include/format b/libcxx/include/format --- a/libcxx/include/format +++ b/libcxx/include/format @@ -146,6 +146,7 @@ #include <__format/formatter_pointer.h> #include <__format/formatter_string.h> #include <__format/parser_std_format_spec.h> +#include <__format/unicode.h> #include <__variant/monostate.h> #include #include diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -561,6 +561,7 @@ export optional export locale } + module extended_grapheme_cluster_table { private header "__format/extended_grapheme_cluster_table.h" } module format_error { private header "__format/format_error.h" } module format_fwd { private header "__format/format_fwd.h" } module format_parse_context { private header "__format/format_parse_context.h" } @@ -575,6 +576,7 @@ module formatter_pointer { private header "__format/formatter_pointer.h" } module formatter_string { private header "__format/formatter_string.h" } module parser_std_format_spec { private header "__format/parser_std_format_spec.h" } + module unicode { private header "__format/unicode.h" } } } module forward_list { diff --git a/libcxx/test/libcxx/private_headers.verify.cpp b/libcxx/test/libcxx/private_headers.verify.cpp --- a/libcxx/test/libcxx/private_headers.verify.cpp +++ b/libcxx/test/libcxx/private_headers.verify.cpp @@ -254,6 +254,7 @@ #include <__format/buffer.h> // expected-error@*:* {{use of private header from outside its module: '__format/buffer.h'}} #include <__format/concepts.h> // expected-error@*:* {{use of private header from outside its module: '__format/concepts.h'}} #include <__format/enable_insertable.h> // expected-error@*:* {{use of private header from outside its module: '__format/enable_insertable.h'}} +#include <__format/extended_grapheme_cluster_table.h> // expected-error@*:* {{use of private header from outside its module: '__format/extended_grapheme_cluster_table.h'}} #include <__format/format_arg.h> // expected-error@*:* {{use of private header from outside its module: '__format/format_arg.h'}} #include <__format/format_arg_store.h> // expected-error@*:* {{use of private header from outside its module: '__format/format_arg_store.h'}} #include <__format/format_args.h> // expected-error@*:* {{use of private header from outside its module: '__format/format_args.h'}} @@ -272,6 +273,7 @@ #include <__format/formatter_pointer.h> // expected-error@*:* {{use of private header from outside its module: '__format/formatter_pointer.h'}} #include <__format/formatter_string.h> // expected-error@*:* {{use of private header from outside its module: '__format/formatter_string.h'}} #include <__format/parser_std_format_spec.h> // expected-error@*:* {{use of private header from outside its module: '__format/parser_std_format_spec.h'}} +#include <__format/unicode.h> // expected-error@*:* {{use of private header from outside its module: '__format/unicode.h'}} #include <__functional/binary_function.h> // expected-error@*:* {{use of private header from outside its module: '__functional/binary_function.h'}} #include <__functional/binary_negate.h> // expected-error@*:* {{use of private header from outside its module: '__functional/binary_negate.h'}} #include <__functional/bind.h> // expected-error@*:* {{use of private header from outside its module: '__functional/bind.h'}} diff --git a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.h b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.h new file mode 100644 --- /dev/null +++ b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.h @@ -0,0 +1,1905 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// WARNING, this entire header is generated by +// utiles/generate_extended_grapheme_cluster_test.py +// DO NOT MODIFY! + +// UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE +// +// See Terms of Use +// for definitions of Unicode Inc.'s Data Files and Software. +// +// NOTICE TO USER: Carefully read the following legal agreement. +// BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S +// DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"), +// YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE +// TERMS AND CONDITIONS OF THIS AGREEMENT. +// IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE +// THE DATA FILES OR SOFTWARE. +// +// COPYRIGHT AND PERMISSION NOTICE +// +// Copyright (c) 1991-2022 Unicode, Inc. All rights reserved. +// Distributed under the Terms of Use in https://www.unicode.org/copyright.html. +// +// Permission is hereby granted, free of charge, to any person obtaining +// a copy of the Unicode data files and any associated documentation +// (the "Data Files") or Unicode software and any associated documentation +// (the "Software") to deal in the Data Files or Software +// without restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, and/or sell copies of +// the Data Files or Software, and to permit persons to whom the Data Files +// or Software are furnished to do so, provided that either +// (a) this copyright and permission notice appear with all copies +// of the Data Files or Software, or +// (b) this copyright and permission notice appear in associated +// Documentation. +// +// THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT OF THIRD PARTY RIGHTS. +// IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS +// NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL +// DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, +// DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +// TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +// PERFORMANCE OF THE DATA FILES OR SOFTWARE. +// +// Except as contained in this notice, the name of a copyright holder +// shall not be used in advertising or otherwise to promote the sale, +// use or other dealings in these Data Files or Software without prior +// written authorization of the copyright holder. + +#ifndef LIBCXX_TEST_STD_UTILITIES_FORMAT_FORMAT_STRING_FORMAT_STRING_STD_EXTENDED_GRAPHEME_CLUSTER_H +#define LIBCXX_TEST_STD_UTILITIES_FORMAT_FORMAT_STRING_FORMAT_STRING_STD_EXTENDED_GRAPHEME_CLUSTER_H + +#include +#include +#include + +template +struct data { + /// The input to parse. + std::basic_string_view input; + + /// The first code point all extended grapheme clusters in the input. + std::vector code_points; + + /// The offset of the last code units of the extended grapheme clusters in the input. + /// + /// The vector has the same number of entries as \ref code_points. + std::vector breaks; +}; + +/// The data for UTF-8. +std::array, 602> data_utf8 = { + {{"\U00000020\U00000020", {32, 32}, {1, 2}}, + {"\U00000020\U00000308\U00000020", {32, 32}, {3, 4}}, + {"\U00000020\U0000000d", {32, 13}, {1, 2}}, + {"\U00000020\U00000308\U0000000d", {32, 13}, {3, 4}}, + {"\U00000020\U0000000a", {32, 10}, {1, 2}}, + {"\U00000020\U00000308\U0000000a", {32, 10}, {3, 4}}, + {"\U00000020\U00000001", {32, 1}, {1, 2}}, + {"\U00000020\U00000308\U00000001", {32, 1}, {3, 4}}, + {"\U00000020\U0000034f", {32}, {3}}, + {"\U00000020\U00000308\U0000034f", {32}, {5}}, + {"\U00000020\U0001f1e6", {32, 127462}, {1, 5}}, + {"\U00000020\U00000308\U0001f1e6", {32, 127462}, {3, 7}}, + {"\U00000020\U00000600", {32, 1536}, {1, 3}}, + {"\U00000020\U00000308\U00000600", {32, 1536}, {3, 5}}, + {"\U00000020\U00000903", {32}, {4}}, + {"\U00000020\U00000308\U00000903", {32}, {6}}, + {"\U00000020\U00001100", {32, 4352}, {1, 4}}, + {"\U00000020\U00000308\U00001100", {32, 4352}, {3, 6}}, + {"\U00000020\U00001160", {32, 4448}, {1, 4}}, + {"\U00000020\U00000308\U00001160", {32, 4448}, {3, 6}}, + {"\U00000020\U000011a8", {32, 4520}, {1, 4}}, + {"\U00000020\U00000308\U000011a8", {32, 4520}, {3, 6}}, + {"\U00000020\U0000ac00", {32, 44032}, {1, 4}}, + {"\U00000020\U00000308\U0000ac00", {32, 44032}, {3, 6}}, + {"\U00000020\U0000ac01", {32, 44033}, {1, 4}}, + {"\U00000020\U00000308\U0000ac01", {32, 44033}, {3, 6}}, + {"\U00000020\U0000231a", {32, 8986}, {1, 4}}, + {"\U00000020\U00000308\U0000231a", {32, 8986}, {3, 6}}, + {"\U00000020\U00000300", {32}, {3}}, + {"\U00000020\U00000308\U00000300", {32}, {5}}, + {"\U00000020\U0000200d", {32}, {4}}, + {"\U00000020\U00000308\U0000200d", {32}, {6}}, + {"\U00000020\U00000378", {32, 888}, {1, 3}}, + {"\U00000020\U00000308\U00000378", {32, 888}, {3, 5}}, + {"\U0000000d\U00000020", {13, 32}, {1, 2}}, + {"\U0000000d\U00000308\U00000020", {13, 776, 32}, {1, 3, 4}}, + {"\U0000000d\U0000000d", {13, 13}, {1, 2}}, + {"\U0000000d\U00000308\U0000000d", {13, 776, 13}, {1, 3, 4}}, + {"\U0000000d\U0000000a", {13}, {2}}, + {"\U0000000d\U00000308\U0000000a", {13, 776, 10}, {1, 3, 4}}, + {"\U0000000d\U00000001", {13, 1}, {1, 2}}, + {"\U0000000d\U00000308\U00000001", {13, 776, 1}, {1, 3, 4}}, + {"\U0000000d\U0000034f", {13, 847}, {1, 3}}, + {"\U0000000d\U00000308\U0000034f", {13, 776}, {1, 5}}, + {"\U0000000d\U0001f1e6", {13, 127462}, {1, 5}}, + {"\U0000000d\U00000308\U0001f1e6", {13, 776, 127462}, {1, 3, 7}}, + {"\U0000000d\U00000600", {13, 1536}, {1, 3}}, + {"\U0000000d\U00000308\U00000600", {13, 776, 1536}, {1, 3, 5}}, + {"\U0000000d\U00000903", {13, 2307}, {1, 4}}, + {"\U0000000d\U00000308\U00000903", {13, 776}, {1, 6}}, + {"\U0000000d\U00001100", {13, 4352}, {1, 4}}, + {"\U0000000d\U00000308\U00001100", {13, 776, 4352}, {1, 3, 6}}, + {"\U0000000d\U00001160", {13, 4448}, {1, 4}}, + {"\U0000000d\U00000308\U00001160", {13, 776, 4448}, {1, 3, 6}}, + {"\U0000000d\U000011a8", {13, 4520}, {1, 4}}, + {"\U0000000d\U00000308\U000011a8", {13, 776, 4520}, {1, 3, 6}}, + {"\U0000000d\U0000ac00", {13, 44032}, {1, 4}}, + {"\U0000000d\U00000308\U0000ac00", {13, 776, 44032}, {1, 3, 6}}, + {"\U0000000d\U0000ac01", {13, 44033}, {1, 4}}, + {"\U0000000d\U00000308\U0000ac01", {13, 776, 44033}, {1, 3, 6}}, + {"\U0000000d\U0000231a", {13, 8986}, {1, 4}}, + {"\U0000000d\U00000308\U0000231a", {13, 776, 8986}, {1, 3, 6}}, + {"\U0000000d\U00000300", {13, 768}, {1, 3}}, + {"\U0000000d\U00000308\U00000300", {13, 776}, {1, 5}}, + {"\U0000000d\U0000200d", {13, 8205}, {1, 4}}, + {"\U0000000d\U00000308\U0000200d", {13, 776}, {1, 6}}, + {"\U0000000d\U00000378", {13, 888}, {1, 3}}, + {"\U0000000d\U00000308\U00000378", {13, 776, 888}, {1, 3, 5}}, + {"\U0000000a\U00000020", {10, 32}, {1, 2}}, + {"\U0000000a\U00000308\U00000020", {10, 776, 32}, {1, 3, 4}}, + {"\U0000000a\U0000000d", {10, 13}, {1, 2}}, + {"\U0000000a\U00000308\U0000000d", {10, 776, 13}, {1, 3, 4}}, + {"\U0000000a\U0000000a", {10, 10}, {1, 2}}, + {"\U0000000a\U00000308\U0000000a", {10, 776, 10}, {1, 3, 4}}, + {"\U0000000a\U00000001", {10, 1}, {1, 2}}, + {"\U0000000a\U00000308\U00000001", {10, 776, 1}, {1, 3, 4}}, + {"\U0000000a\U0000034f", {10, 847}, {1, 3}}, + {"\U0000000a\U00000308\U0000034f", {10, 776}, {1, 5}}, + {"\U0000000a\U0001f1e6", {10, 127462}, {1, 5}}, + {"\U0000000a\U00000308\U0001f1e6", {10, 776, 127462}, {1, 3, 7}}, + {"\U0000000a\U00000600", {10, 1536}, {1, 3}}, + {"\U0000000a\U00000308\U00000600", {10, 776, 1536}, {1, 3, 5}}, + {"\U0000000a\U00000903", {10, 2307}, {1, 4}}, + {"\U0000000a\U00000308\U00000903", {10, 776}, {1, 6}}, + {"\U0000000a\U00001100", {10, 4352}, {1, 4}}, + {"\U0000000a\U00000308\U00001100", {10, 776, 4352}, {1, 3, 6}}, + {"\U0000000a\U00001160", {10, 4448}, {1, 4}}, + {"\U0000000a\U00000308\U00001160", {10, 776, 4448}, {1, 3, 6}}, + {"\U0000000a\U000011a8", {10, 4520}, {1, 4}}, + {"\U0000000a\U00000308\U000011a8", {10, 776, 4520}, {1, 3, 6}}, + {"\U0000000a\U0000ac00", {10, 44032}, {1, 4}}, + {"\U0000000a\U00000308\U0000ac00", {10, 776, 44032}, {1, 3, 6}}, + {"\U0000000a\U0000ac01", {10, 44033}, {1, 4}}, + {"\U0000000a\U00000308\U0000ac01", {10, 776, 44033}, {1, 3, 6}}, + {"\U0000000a\U0000231a", {10, 8986}, {1, 4}}, + {"\U0000000a\U00000308\U0000231a", {10, 776, 8986}, {1, 3, 6}}, + {"\U0000000a\U00000300", {10, 768}, {1, 3}}, + {"\U0000000a\U00000308\U00000300", {10, 776}, {1, 5}}, + {"\U0000000a\U0000200d", {10, 8205}, {1, 4}}, + {"\U0000000a\U00000308\U0000200d", {10, 776}, {1, 6}}, + {"\U0000000a\U00000378", {10, 888}, {1, 3}}, + {"\U0000000a\U00000308\U00000378", {10, 776, 888}, {1, 3, 5}}, + {"\U00000001\U00000020", {1, 32}, {1, 2}}, + {"\U00000001\U00000308\U00000020", {1, 776, 32}, {1, 3, 4}}, + {"\U00000001\U0000000d", {1, 13}, {1, 2}}, + {"\U00000001\U00000308\U0000000d", {1, 776, 13}, {1, 3, 4}}, + {"\U00000001\U0000000a", {1, 10}, {1, 2}}, + {"\U00000001\U00000308\U0000000a", {1, 776, 10}, {1, 3, 4}}, + {"\U00000001\U00000001", {1, 1}, {1, 2}}, + {"\U00000001\U00000308\U00000001", {1, 776, 1}, {1, 3, 4}}, + {"\U00000001\U0000034f", {1, 847}, {1, 3}}, + {"\U00000001\U00000308\U0000034f", {1, 776}, {1, 5}}, + {"\U00000001\U0001f1e6", {1, 127462}, {1, 5}}, + {"\U00000001\U00000308\U0001f1e6", {1, 776, 127462}, {1, 3, 7}}, + {"\U00000001\U00000600", {1, 1536}, {1, 3}}, + {"\U00000001\U00000308\U00000600", {1, 776, 1536}, {1, 3, 5}}, + {"\U00000001\U00000903", {1, 2307}, {1, 4}}, + {"\U00000001\U00000308\U00000903", {1, 776}, {1, 6}}, + {"\U00000001\U00001100", {1, 4352}, {1, 4}}, + {"\U00000001\U00000308\U00001100", {1, 776, 4352}, {1, 3, 6}}, + {"\U00000001\U00001160", {1, 4448}, {1, 4}}, + {"\U00000001\U00000308\U00001160", {1, 776, 4448}, {1, 3, 6}}, + {"\U00000001\U000011a8", {1, 4520}, {1, 4}}, + {"\U00000001\U00000308\U000011a8", {1, 776, 4520}, {1, 3, 6}}, + {"\U00000001\U0000ac00", {1, 44032}, {1, 4}}, + {"\U00000001\U00000308\U0000ac00", {1, 776, 44032}, {1, 3, 6}}, + {"\U00000001\U0000ac01", {1, 44033}, {1, 4}}, + {"\U00000001\U00000308\U0000ac01", {1, 776, 44033}, {1, 3, 6}}, + {"\U00000001\U0000231a", {1, 8986}, {1, 4}}, + {"\U00000001\U00000308\U0000231a", {1, 776, 8986}, {1, 3, 6}}, + {"\U00000001\U00000300", {1, 768}, {1, 3}}, + {"\U00000001\U00000308\U00000300", {1, 776}, {1, 5}}, + {"\U00000001\U0000200d", {1, 8205}, {1, 4}}, + {"\U00000001\U00000308\U0000200d", {1, 776}, {1, 6}}, + {"\U00000001\U00000378", {1, 888}, {1, 3}}, + {"\U00000001\U00000308\U00000378", {1, 776, 888}, {1, 3, 5}}, + {"\U0000034f\U00000020", {847, 32}, {2, 3}}, + {"\U0000034f\U00000308\U00000020", {847, 32}, {4, 5}}, + {"\U0000034f\U0000000d", {847, 13}, {2, 3}}, + {"\U0000034f\U00000308\U0000000d", {847, 13}, {4, 5}}, + {"\U0000034f\U0000000a", {847, 10}, {2, 3}}, + {"\U0000034f\U00000308\U0000000a", {847, 10}, {4, 5}}, + {"\U0000034f\U00000001", {847, 1}, {2, 3}}, + {"\U0000034f\U00000308\U00000001", {847, 1}, {4, 5}}, + {"\U0000034f\U0000034f", {847}, {4}}, + {"\U0000034f\U00000308\U0000034f", {847}, {6}}, + {"\U0000034f\U0001f1e6", {847, 127462}, {2, 6}}, + {"\U0000034f\U00000308\U0001f1e6", {847, 127462}, {4, 8}}, + {"\U0000034f\U00000600", {847, 1536}, {2, 4}}, + {"\U0000034f\U00000308\U00000600", {847, 1536}, {4, 6}}, + {"\U0000034f\U00000903", {847}, {5}}, + {"\U0000034f\U00000308\U00000903", {847}, {7}}, + {"\U0000034f\U00001100", {847, 4352}, {2, 5}}, + {"\U0000034f\U00000308\U00001100", {847, 4352}, {4, 7}}, + {"\U0000034f\U00001160", {847, 4448}, {2, 5}}, + {"\U0000034f\U00000308\U00001160", {847, 4448}, {4, 7}}, + {"\U0000034f\U000011a8", {847, 4520}, {2, 5}}, + {"\U0000034f\U00000308\U000011a8", {847, 4520}, {4, 7}}, + {"\U0000034f\U0000ac00", {847, 44032}, {2, 5}}, + {"\U0000034f\U00000308\U0000ac00", {847, 44032}, {4, 7}}, + {"\U0000034f\U0000ac01", {847, 44033}, {2, 5}}, + {"\U0000034f\U00000308\U0000ac01", {847, 44033}, {4, 7}}, + {"\U0000034f\U0000231a", {847, 8986}, {2, 5}}, + {"\U0000034f\U00000308\U0000231a", {847, 8986}, {4, 7}}, + {"\U0000034f\U00000300", {847}, {4}}, + {"\U0000034f\U00000308\U00000300", {847}, {6}}, + {"\U0000034f\U0000200d", {847}, {5}}, + {"\U0000034f\U00000308\U0000200d", {847}, {7}}, + {"\U0000034f\U00000378", {847, 888}, {2, 4}}, + {"\U0000034f\U00000308\U00000378", {847, 888}, {4, 6}}, + {"\U0001f1e6\U00000020", {127462, 32}, {4, 5}}, + {"\U0001f1e6\U00000308\U00000020", {127462, 32}, {6, 7}}, + {"\U0001f1e6\U0000000d", {127462, 13}, {4, 5}}, + {"\U0001f1e6\U00000308\U0000000d", {127462, 13}, {6, 7}}, + {"\U0001f1e6\U0000000a", {127462, 10}, {4, 5}}, + {"\U0001f1e6\U00000308\U0000000a", {127462, 10}, {6, 7}}, + {"\U0001f1e6\U00000001", {127462, 1}, {4, 5}}, + {"\U0001f1e6\U00000308\U00000001", {127462, 1}, {6, 7}}, + {"\U0001f1e6\U0000034f", {127462}, {6}}, + {"\U0001f1e6\U00000308\U0000034f", {127462}, {8}}, + {"\U0001f1e6\U0001f1e6", {127462}, {8}}, + {"\U0001f1e6\U00000308\U0001f1e6", {127462, 127462}, {6, 10}}, + {"\U0001f1e6\U00000600", {127462, 1536}, {4, 6}}, + {"\U0001f1e6\U00000308\U00000600", {127462, 1536}, {6, 8}}, + {"\U0001f1e6\U00000903", {127462}, {7}}, + {"\U0001f1e6\U00000308\U00000903", {127462}, {9}}, + {"\U0001f1e6\U00001100", {127462, 4352}, {4, 7}}, + {"\U0001f1e6\U00000308\U00001100", {127462, 4352}, {6, 9}}, + {"\U0001f1e6\U00001160", {127462, 4448}, {4, 7}}, + {"\U0001f1e6\U00000308\U00001160", {127462, 4448}, {6, 9}}, + {"\U0001f1e6\U000011a8", {127462, 4520}, {4, 7}}, + {"\U0001f1e6\U00000308\U000011a8", {127462, 4520}, {6, 9}}, + {"\U0001f1e6\U0000ac00", {127462, 44032}, {4, 7}}, + {"\U0001f1e6\U00000308\U0000ac00", {127462, 44032}, {6, 9}}, + {"\U0001f1e6\U0000ac01", {127462, 44033}, {4, 7}}, + {"\U0001f1e6\U00000308\U0000ac01", {127462, 44033}, {6, 9}}, + {"\U0001f1e6\U0000231a", {127462, 8986}, {4, 7}}, + {"\U0001f1e6\U00000308\U0000231a", {127462, 8986}, {6, 9}}, + {"\U0001f1e6\U00000300", {127462}, {6}}, + {"\U0001f1e6\U00000308\U00000300", {127462}, {8}}, + {"\U0001f1e6\U0000200d", {127462}, {7}}, + {"\U0001f1e6\U00000308\U0000200d", {127462}, {9}}, + {"\U0001f1e6\U00000378", {127462, 888}, {4, 6}}, + {"\U0001f1e6\U00000308\U00000378", {127462, 888}, {6, 8}}, + {"\U00000600\U00000020", {1536}, {3}}, + {"\U00000600\U00000308\U00000020", {1536, 32}, {4, 5}}, + {"\U00000600\U0000000d", {1536, 13}, {2, 3}}, + {"\U00000600\U00000308\U0000000d", {1536, 13}, {4, 5}}, + {"\U00000600\U0000000a", {1536, 10}, {2, 3}}, + {"\U00000600\U00000308\U0000000a", {1536, 10}, {4, 5}}, + {"\U00000600\U00000001", {1536, 1}, {2, 3}}, + {"\U00000600\U00000308\U00000001", {1536, 1}, {4, 5}}, + {"\U00000600\U0000034f", {1536}, {4}}, + {"\U00000600\U00000308\U0000034f", {1536}, {6}}, + {"\U00000600\U0001f1e6", {1536}, {6}}, + {"\U00000600\U00000308\U0001f1e6", {1536, 127462}, {4, 8}}, + {"\U00000600\U00000600", {1536}, {4}}, + {"\U00000600\U00000308\U00000600", {1536, 1536}, {4, 6}}, + {"\U00000600\U00000903", {1536}, {5}}, + {"\U00000600\U00000308\U00000903", {1536}, {7}}, + {"\U00000600\U00001100", {1536}, {5}}, + {"\U00000600\U00000308\U00001100", {1536, 4352}, {4, 7}}, + {"\U00000600\U00001160", {1536}, {5}}, + {"\U00000600\U00000308\U00001160", {1536, 4448}, {4, 7}}, + {"\U00000600\U000011a8", {1536}, {5}}, + {"\U00000600\U00000308\U000011a8", {1536, 4520}, {4, 7}}, + {"\U00000600\U0000ac00", {1536}, {5}}, + {"\U00000600\U00000308\U0000ac00", {1536, 44032}, {4, 7}}, + {"\U00000600\U0000ac01", {1536}, {5}}, + {"\U00000600\U00000308\U0000ac01", {1536, 44033}, {4, 7}}, + {"\U00000600\U0000231a", {1536}, {5}}, + {"\U00000600\U00000308\U0000231a", {1536, 8986}, {4, 7}}, + {"\U00000600\U00000300", {1536}, {4}}, + {"\U00000600\U00000308\U00000300", {1536}, {6}}, + {"\U00000600\U0000200d", {1536}, {5}}, + {"\U00000600\U00000308\U0000200d", {1536}, {7}}, + {"\U00000600\U00000378", {1536}, {4}}, + {"\U00000600\U00000308\U00000378", {1536, 888}, {4, 6}}, + {"\U00000903\U00000020", {2307, 32}, {3, 4}}, + {"\U00000903\U00000308\U00000020", {2307, 32}, {5, 6}}, + {"\U00000903\U0000000d", {2307, 13}, {3, 4}}, + {"\U00000903\U00000308\U0000000d", {2307, 13}, {5, 6}}, + {"\U00000903\U0000000a", {2307, 10}, {3, 4}}, + {"\U00000903\U00000308\U0000000a", {2307, 10}, {5, 6}}, + {"\U00000903\U00000001", {2307, 1}, {3, 4}}, + {"\U00000903\U00000308\U00000001", {2307, 1}, {5, 6}}, + {"\U00000903\U0000034f", {2307}, {5}}, + {"\U00000903\U00000308\U0000034f", {2307}, {7}}, + {"\U00000903\U0001f1e6", {2307, 127462}, {3, 7}}, + {"\U00000903\U00000308\U0001f1e6", {2307, 127462}, {5, 9}}, + {"\U00000903\U00000600", {2307, 1536}, {3, 5}}, + {"\U00000903\U00000308\U00000600", {2307, 1536}, {5, 7}}, + {"\U00000903\U00000903", {2307}, {6}}, + {"\U00000903\U00000308\U00000903", {2307}, {8}}, + {"\U00000903\U00001100", {2307, 4352}, {3, 6}}, + {"\U00000903\U00000308\U00001100", {2307, 4352}, {5, 8}}, + {"\U00000903\U00001160", {2307, 4448}, {3, 6}}, + {"\U00000903\U00000308\U00001160", {2307, 4448}, {5, 8}}, + {"\U00000903\U000011a8", {2307, 4520}, {3, 6}}, + {"\U00000903\U00000308\U000011a8", {2307, 4520}, {5, 8}}, + {"\U00000903\U0000ac00", {2307, 44032}, {3, 6}}, + {"\U00000903\U00000308\U0000ac00", {2307, 44032}, {5, 8}}, + {"\U00000903\U0000ac01", {2307, 44033}, {3, 6}}, + {"\U00000903\U00000308\U0000ac01", {2307, 44033}, {5, 8}}, + {"\U00000903\U0000231a", {2307, 8986}, {3, 6}}, + {"\U00000903\U00000308\U0000231a", {2307, 8986}, {5, 8}}, + {"\U00000903\U00000300", {2307}, {5}}, + {"\U00000903\U00000308\U00000300", {2307}, {7}}, + {"\U00000903\U0000200d", {2307}, {6}}, + {"\U00000903\U00000308\U0000200d", {2307}, {8}}, + {"\U00000903\U00000378", {2307, 888}, {3, 5}}, + {"\U00000903\U00000308\U00000378", {2307, 888}, {5, 7}}, + {"\U00001100\U00000020", {4352, 32}, {3, 4}}, + {"\U00001100\U00000308\U00000020", {4352, 32}, {5, 6}}, + {"\U00001100\U0000000d", {4352, 13}, {3, 4}}, + {"\U00001100\U00000308\U0000000d", {4352, 13}, {5, 6}}, + {"\U00001100\U0000000a", {4352, 10}, {3, 4}}, + {"\U00001100\U00000308\U0000000a", {4352, 10}, {5, 6}}, + {"\U00001100\U00000001", {4352, 1}, {3, 4}}, + {"\U00001100\U00000308\U00000001", {4352, 1}, {5, 6}}, + {"\U00001100\U0000034f", {4352}, {5}}, + {"\U00001100\U00000308\U0000034f", {4352}, {7}}, + {"\U00001100\U0001f1e6", {4352, 127462}, {3, 7}}, + {"\U00001100\U00000308\U0001f1e6", {4352, 127462}, {5, 9}}, + {"\U00001100\U00000600", {4352, 1536}, {3, 5}}, + {"\U00001100\U00000308\U00000600", {4352, 1536}, {5, 7}}, + {"\U00001100\U00000903", {4352}, {6}}, + {"\U00001100\U00000308\U00000903", {4352}, {8}}, + {"\U00001100\U00001100", {4352}, {6}}, + {"\U00001100\U00000308\U00001100", {4352, 4352}, {5, 8}}, + {"\U00001100\U00001160", {4352}, {6}}, + {"\U00001100\U00000308\U00001160", {4352, 4448}, {5, 8}}, + {"\U00001100\U000011a8", {4352, 4520}, {3, 6}}, + {"\U00001100\U00000308\U000011a8", {4352, 4520}, {5, 8}}, + {"\U00001100\U0000ac00", {4352}, {6}}, + {"\U00001100\U00000308\U0000ac00", {4352, 44032}, {5, 8}}, + {"\U00001100\U0000ac01", {4352}, {6}}, + {"\U00001100\U00000308\U0000ac01", {4352, 44033}, {5, 8}}, + {"\U00001100\U0000231a", {4352, 8986}, {3, 6}}, + {"\U00001100\U00000308\U0000231a", {4352, 8986}, {5, 8}}, + {"\U00001100\U00000300", {4352}, {5}}, + {"\U00001100\U00000308\U00000300", {4352}, {7}}, + {"\U00001100\U0000200d", {4352}, {6}}, + {"\U00001100\U00000308\U0000200d", {4352}, {8}}, + {"\U00001100\U00000378", {4352, 888}, {3, 5}}, + {"\U00001100\U00000308\U00000378", {4352, 888}, {5, 7}}, + {"\U00001160\U00000020", {4448, 32}, {3, 4}}, + {"\U00001160\U00000308\U00000020", {4448, 32}, {5, 6}}, + {"\U00001160\U0000000d", {4448, 13}, {3, 4}}, + {"\U00001160\U00000308\U0000000d", {4448, 13}, {5, 6}}, + {"\U00001160\U0000000a", {4448, 10}, {3, 4}}, + {"\U00001160\U00000308\U0000000a", {4448, 10}, {5, 6}}, + {"\U00001160\U00000001", {4448, 1}, {3, 4}}, + {"\U00001160\U00000308\U00000001", {4448, 1}, {5, 6}}, + {"\U00001160\U0000034f", {4448}, {5}}, + {"\U00001160\U00000308\U0000034f", {4448}, {7}}, + {"\U00001160\U0001f1e6", {4448, 127462}, {3, 7}}, + {"\U00001160\U00000308\U0001f1e6", {4448, 127462}, {5, 9}}, + {"\U00001160\U00000600", {4448, 1536}, {3, 5}}, + {"\U00001160\U00000308\U00000600", {4448, 1536}, {5, 7}}, + {"\U00001160\U00000903", {4448}, {6}}, + {"\U00001160\U00000308\U00000903", {4448}, {8}}, + {"\U00001160\U00001100", {4448, 4352}, {3, 6}}, + {"\U00001160\U00000308\U00001100", {4448, 4352}, {5, 8}}, + {"\U00001160\U00001160", {4448}, {6}}, + {"\U00001160\U00000308\U00001160", {4448, 4448}, {5, 8}}, + {"\U00001160\U000011a8", {4448}, {6}}, + {"\U00001160\U00000308\U000011a8", {4448, 4520}, {5, 8}}, + {"\U00001160\U0000ac00", {4448, 44032}, {3, 6}}, + {"\U00001160\U00000308\U0000ac00", {4448, 44032}, {5, 8}}, + {"\U00001160\U0000ac01", {4448, 44033}, {3, 6}}, + {"\U00001160\U00000308\U0000ac01", {4448, 44033}, {5, 8}}, + {"\U00001160\U0000231a", {4448, 8986}, {3, 6}}, + {"\U00001160\U00000308\U0000231a", {4448, 8986}, {5, 8}}, + {"\U00001160\U00000300", {4448}, {5}}, + {"\U00001160\U00000308\U00000300", {4448}, {7}}, + {"\U00001160\U0000200d", {4448}, {6}}, + {"\U00001160\U00000308\U0000200d", {4448}, {8}}, + {"\U00001160\U00000378", {4448, 888}, {3, 5}}, + {"\U00001160\U00000308\U00000378", {4448, 888}, {5, 7}}, + {"\U000011a8\U00000020", {4520, 32}, {3, 4}}, + {"\U000011a8\U00000308\U00000020", {4520, 32}, {5, 6}}, + {"\U000011a8\U0000000d", {4520, 13}, {3, 4}}, + {"\U000011a8\U00000308\U0000000d", {4520, 13}, {5, 6}}, + {"\U000011a8\U0000000a", {4520, 10}, {3, 4}}, + {"\U000011a8\U00000308\U0000000a", {4520, 10}, {5, 6}}, + {"\U000011a8\U00000001", {4520, 1}, {3, 4}}, + {"\U000011a8\U00000308\U00000001", {4520, 1}, {5, 6}}, + {"\U000011a8\U0000034f", {4520}, {5}}, + {"\U000011a8\U00000308\U0000034f", {4520}, {7}}, + {"\U000011a8\U0001f1e6", {4520, 127462}, {3, 7}}, + {"\U000011a8\U00000308\U0001f1e6", {4520, 127462}, {5, 9}}, + {"\U000011a8\U00000600", {4520, 1536}, {3, 5}}, + {"\U000011a8\U00000308\U00000600", {4520, 1536}, {5, 7}}, + {"\U000011a8\U00000903", {4520}, {6}}, + {"\U000011a8\U00000308\U00000903", {4520}, {8}}, + {"\U000011a8\U00001100", {4520, 4352}, {3, 6}}, + {"\U000011a8\U00000308\U00001100", {4520, 4352}, {5, 8}}, + {"\U000011a8\U00001160", {4520, 4448}, {3, 6}}, + {"\U000011a8\U00000308\U00001160", {4520, 4448}, {5, 8}}, + {"\U000011a8\U000011a8", {4520}, {6}}, + {"\U000011a8\U00000308\U000011a8", {4520, 4520}, {5, 8}}, + {"\U000011a8\U0000ac00", {4520, 44032}, {3, 6}}, + {"\U000011a8\U00000308\U0000ac00", {4520, 44032}, {5, 8}}, + {"\U000011a8\U0000ac01", {4520, 44033}, {3, 6}}, + {"\U000011a8\U00000308\U0000ac01", {4520, 44033}, {5, 8}}, + {"\U000011a8\U0000231a", {4520, 8986}, {3, 6}}, + {"\U000011a8\U00000308\U0000231a", {4520, 8986}, {5, 8}}, + {"\U000011a8\U00000300", {4520}, {5}}, + {"\U000011a8\U00000308\U00000300", {4520}, {7}}, + {"\U000011a8\U0000200d", {4520}, {6}}, + {"\U000011a8\U00000308\U0000200d", {4520}, {8}}, + {"\U000011a8\U00000378", {4520, 888}, {3, 5}}, + {"\U000011a8\U00000308\U00000378", {4520, 888}, {5, 7}}, + {"\U0000ac00\U00000020", {44032, 32}, {3, 4}}, + {"\U0000ac00\U00000308\U00000020", {44032, 32}, {5, 6}}, + {"\U0000ac00\U0000000d", {44032, 13}, {3, 4}}, + {"\U0000ac00\U00000308\U0000000d", {44032, 13}, {5, 6}}, + {"\U0000ac00\U0000000a", {44032, 10}, {3, 4}}, + {"\U0000ac00\U00000308\U0000000a", {44032, 10}, {5, 6}}, + {"\U0000ac00\U00000001", {44032, 1}, {3, 4}}, + {"\U0000ac00\U00000308\U00000001", {44032, 1}, {5, 6}}, + {"\U0000ac00\U0000034f", {44032}, {5}}, + {"\U0000ac00\U00000308\U0000034f", {44032}, {7}}, + {"\U0000ac00\U0001f1e6", {44032, 127462}, {3, 7}}, + {"\U0000ac00\U00000308\U0001f1e6", {44032, 127462}, {5, 9}}, + {"\U0000ac00\U00000600", {44032, 1536}, {3, 5}}, + {"\U0000ac00\U00000308\U00000600", {44032, 1536}, {5, 7}}, + {"\U0000ac00\U00000903", {44032}, {6}}, + {"\U0000ac00\U00000308\U00000903", {44032}, {8}}, + {"\U0000ac00\U00001100", {44032, 4352}, {3, 6}}, + {"\U0000ac00\U00000308\U00001100", {44032, 4352}, {5, 8}}, + {"\U0000ac00\U00001160", {44032}, {6}}, + {"\U0000ac00\U00000308\U00001160", {44032, 4448}, {5, 8}}, + {"\U0000ac00\U000011a8", {44032}, {6}}, + {"\U0000ac00\U00000308\U000011a8", {44032, 4520}, {5, 8}}, + {"\U0000ac00\U0000ac00", {44032, 44032}, {3, 6}}, + {"\U0000ac00\U00000308\U0000ac00", {44032, 44032}, {5, 8}}, + {"\U0000ac00\U0000ac01", {44032, 44033}, {3, 6}}, + {"\U0000ac00\U00000308\U0000ac01", {44032, 44033}, {5, 8}}, + {"\U0000ac00\U0000231a", {44032, 8986}, {3, 6}}, + {"\U0000ac00\U00000308\U0000231a", {44032, 8986}, {5, 8}}, + {"\U0000ac00\U00000300", {44032}, {5}}, + {"\U0000ac00\U00000308\U00000300", {44032}, {7}}, + {"\U0000ac00\U0000200d", {44032}, {6}}, + {"\U0000ac00\U00000308\U0000200d", {44032}, {8}}, + {"\U0000ac00\U00000378", {44032, 888}, {3, 5}}, + {"\U0000ac00\U00000308\U00000378", {44032, 888}, {5, 7}}, + {"\U0000ac01\U00000020", {44033, 32}, {3, 4}}, + {"\U0000ac01\U00000308\U00000020", {44033, 32}, {5, 6}}, + {"\U0000ac01\U0000000d", {44033, 13}, {3, 4}}, + {"\U0000ac01\U00000308\U0000000d", {44033, 13}, {5, 6}}, + {"\U0000ac01\U0000000a", {44033, 10}, {3, 4}}, + {"\U0000ac01\U00000308\U0000000a", {44033, 10}, {5, 6}}, + {"\U0000ac01\U00000001", {44033, 1}, {3, 4}}, + {"\U0000ac01\U00000308\U00000001", {44033, 1}, {5, 6}}, + {"\U0000ac01\U0000034f", {44033}, {5}}, + {"\U0000ac01\U00000308\U0000034f", {44033}, {7}}, + {"\U0000ac01\U0001f1e6", {44033, 127462}, {3, 7}}, + {"\U0000ac01\U00000308\U0001f1e6", {44033, 127462}, {5, 9}}, + {"\U0000ac01\U00000600", {44033, 1536}, {3, 5}}, + {"\U0000ac01\U00000308\U00000600", {44033, 1536}, {5, 7}}, + {"\U0000ac01\U00000903", {44033}, {6}}, + {"\U0000ac01\U00000308\U00000903", {44033}, {8}}, + {"\U0000ac01\U00001100", {44033, 4352}, {3, 6}}, + {"\U0000ac01\U00000308\U00001100", {44033, 4352}, {5, 8}}, + {"\U0000ac01\U00001160", {44033, 4448}, {3, 6}}, + {"\U0000ac01\U00000308\U00001160", {44033, 4448}, {5, 8}}, + {"\U0000ac01\U000011a8", {44033}, {6}}, + {"\U0000ac01\U00000308\U000011a8", {44033, 4520}, {5, 8}}, + {"\U0000ac01\U0000ac00", {44033, 44032}, {3, 6}}, + {"\U0000ac01\U00000308\U0000ac00", {44033, 44032}, {5, 8}}, + {"\U0000ac01\U0000ac01", {44033, 44033}, {3, 6}}, + {"\U0000ac01\U00000308\U0000ac01", {44033, 44033}, {5, 8}}, + {"\U0000ac01\U0000231a", {44033, 8986}, {3, 6}}, + {"\U0000ac01\U00000308\U0000231a", {44033, 8986}, {5, 8}}, + {"\U0000ac01\U00000300", {44033}, {5}}, + {"\U0000ac01\U00000308\U00000300", {44033}, {7}}, + {"\U0000ac01\U0000200d", {44033}, {6}}, + {"\U0000ac01\U00000308\U0000200d", {44033}, {8}}, + {"\U0000ac01\U00000378", {44033, 888}, {3, 5}}, + {"\U0000ac01\U00000308\U00000378", {44033, 888}, {5, 7}}, + {"\U0000231a\U00000020", {8986, 32}, {3, 4}}, + {"\U0000231a\U00000308\U00000020", {8986, 32}, {5, 6}}, + {"\U0000231a\U0000000d", {8986, 13}, {3, 4}}, + {"\U0000231a\U00000308\U0000000d", {8986, 13}, {5, 6}}, + {"\U0000231a\U0000000a", {8986, 10}, {3, 4}}, + {"\U0000231a\U00000308\U0000000a", {8986, 10}, {5, 6}}, + {"\U0000231a\U00000001", {8986, 1}, {3, 4}}, + {"\U0000231a\U00000308\U00000001", {8986, 1}, {5, 6}}, + {"\U0000231a\U0000034f", {8986}, {5}}, + {"\U0000231a\U00000308\U0000034f", {8986}, {7}}, + {"\U0000231a\U0001f1e6", {8986, 127462}, {3, 7}}, + {"\U0000231a\U00000308\U0001f1e6", {8986, 127462}, {5, 9}}, + {"\U0000231a\U00000600", {8986, 1536}, {3, 5}}, + {"\U0000231a\U00000308\U00000600", {8986, 1536}, {5, 7}}, + {"\U0000231a\U00000903", {8986}, {6}}, + {"\U0000231a\U00000308\U00000903", {8986}, {8}}, + {"\U0000231a\U00001100", {8986, 4352}, {3, 6}}, + {"\U0000231a\U00000308\U00001100", {8986, 4352}, {5, 8}}, + {"\U0000231a\U00001160", {8986, 4448}, {3, 6}}, + {"\U0000231a\U00000308\U00001160", {8986, 4448}, {5, 8}}, + {"\U0000231a\U000011a8", {8986, 4520}, {3, 6}}, + {"\U0000231a\U00000308\U000011a8", {8986, 4520}, {5, 8}}, + {"\U0000231a\U0000ac00", {8986, 44032}, {3, 6}}, + {"\U0000231a\U00000308\U0000ac00", {8986, 44032}, {5, 8}}, + {"\U0000231a\U0000ac01", {8986, 44033}, {3, 6}}, + {"\U0000231a\U00000308\U0000ac01", {8986, 44033}, {5, 8}}, + {"\U0000231a\U0000231a", {8986, 8986}, {3, 6}}, + {"\U0000231a\U00000308\U0000231a", {8986, 8986}, {5, 8}}, + {"\U0000231a\U00000300", {8986}, {5}}, + {"\U0000231a\U00000308\U00000300", {8986}, {7}}, + {"\U0000231a\U0000200d", {8986}, {6}}, + {"\U0000231a\U00000308\U0000200d", {8986}, {8}}, + {"\U0000231a\U00000378", {8986, 888}, {3, 5}}, + {"\U0000231a\U00000308\U00000378", {8986, 888}, {5, 7}}, + {"\U00000300\U00000020", {768, 32}, {2, 3}}, + {"\U00000300\U00000308\U00000020", {768, 32}, {4, 5}}, + {"\U00000300\U0000000d", {768, 13}, {2, 3}}, + {"\U00000300\U00000308\U0000000d", {768, 13}, {4, 5}}, + {"\U00000300\U0000000a", {768, 10}, {2, 3}}, + {"\U00000300\U00000308\U0000000a", {768, 10}, {4, 5}}, + {"\U00000300\U00000001", {768, 1}, {2, 3}}, + {"\U00000300\U00000308\U00000001", {768, 1}, {4, 5}}, + {"\U00000300\U0000034f", {768}, {4}}, + {"\U00000300\U00000308\U0000034f", {768}, {6}}, + {"\U00000300\U0001f1e6", {768, 127462}, {2, 6}}, + {"\U00000300\U00000308\U0001f1e6", {768, 127462}, {4, 8}}, + {"\U00000300\U00000600", {768, 1536}, {2, 4}}, + {"\U00000300\U00000308\U00000600", {768, 1536}, {4, 6}}, + {"\U00000300\U00000903", {768}, {5}}, + {"\U00000300\U00000308\U00000903", {768}, {7}}, + {"\U00000300\U00001100", {768, 4352}, {2, 5}}, + {"\U00000300\U00000308\U00001100", {768, 4352}, {4, 7}}, + {"\U00000300\U00001160", {768, 4448}, {2, 5}}, + {"\U00000300\U00000308\U00001160", {768, 4448}, {4, 7}}, + {"\U00000300\U000011a8", {768, 4520}, {2, 5}}, + {"\U00000300\U00000308\U000011a8", {768, 4520}, {4, 7}}, + {"\U00000300\U0000ac00", {768, 44032}, {2, 5}}, + {"\U00000300\U00000308\U0000ac00", {768, 44032}, {4, 7}}, + {"\U00000300\U0000ac01", {768, 44033}, {2, 5}}, + {"\U00000300\U00000308\U0000ac01", {768, 44033}, {4, 7}}, + {"\U00000300\U0000231a", {768, 8986}, {2, 5}}, + {"\U00000300\U00000308\U0000231a", {768, 8986}, {4, 7}}, + {"\U00000300\U00000300", {768}, {4}}, + {"\U00000300\U00000308\U00000300", {768}, {6}}, + {"\U00000300\U0000200d", {768}, {5}}, + {"\U00000300\U00000308\U0000200d", {768}, {7}}, + {"\U00000300\U00000378", {768, 888}, {2, 4}}, + {"\U00000300\U00000308\U00000378", {768, 888}, {4, 6}}, + {"\U0000200d\U00000020", {8205, 32}, {3, 4}}, + {"\U0000200d\U00000308\U00000020", {8205, 32}, {5, 6}}, + {"\U0000200d\U0000000d", {8205, 13}, {3, 4}}, + {"\U0000200d\U00000308\U0000000d", {8205, 13}, {5, 6}}, + {"\U0000200d\U0000000a", {8205, 10}, {3, 4}}, + {"\U0000200d\U00000308\U0000000a", {8205, 10}, {5, 6}}, + {"\U0000200d\U00000001", {8205, 1}, {3, 4}}, + {"\U0000200d\U00000308\U00000001", {8205, 1}, {5, 6}}, + {"\U0000200d\U0000034f", {8205}, {5}}, + {"\U0000200d\U00000308\U0000034f", {8205}, {7}}, + {"\U0000200d\U0001f1e6", {8205, 127462}, {3, 7}}, + {"\U0000200d\U00000308\U0001f1e6", {8205, 127462}, {5, 9}}, + {"\U0000200d\U00000600", {8205, 1536}, {3, 5}}, + {"\U0000200d\U00000308\U00000600", {8205, 1536}, {5, 7}}, + {"\U0000200d\U00000903", {8205}, {6}}, + {"\U0000200d\U00000308\U00000903", {8205}, {8}}, + {"\U0000200d\U00001100", {8205, 4352}, {3, 6}}, + {"\U0000200d\U00000308\U00001100", {8205, 4352}, {5, 8}}, + {"\U0000200d\U00001160", {8205, 4448}, {3, 6}}, + {"\U0000200d\U00000308\U00001160", {8205, 4448}, {5, 8}}, + {"\U0000200d\U000011a8", {8205, 4520}, {3, 6}}, + {"\U0000200d\U00000308\U000011a8", {8205, 4520}, {5, 8}}, + {"\U0000200d\U0000ac00", {8205, 44032}, {3, 6}}, + {"\U0000200d\U00000308\U0000ac00", {8205, 44032}, {5, 8}}, + {"\U0000200d\U0000ac01", {8205, 44033}, {3, 6}}, + {"\U0000200d\U00000308\U0000ac01", {8205, 44033}, {5, 8}}, + {"\U0000200d\U0000231a", {8205, 8986}, {3, 6}}, + {"\U0000200d\U00000308\U0000231a", {8205, 8986}, {5, 8}}, + {"\U0000200d\U00000300", {8205}, {5}}, + {"\U0000200d\U00000308\U00000300", {8205}, {7}}, + {"\U0000200d\U0000200d", {8205}, {6}}, + {"\U0000200d\U00000308\U0000200d", {8205}, {8}}, + {"\U0000200d\U00000378", {8205, 888}, {3, 5}}, + {"\U0000200d\U00000308\U00000378", {8205, 888}, {5, 7}}, + {"\U00000378\U00000020", {888, 32}, {2, 3}}, + {"\U00000378\U00000308\U00000020", {888, 32}, {4, 5}}, + {"\U00000378\U0000000d", {888, 13}, {2, 3}}, + {"\U00000378\U00000308\U0000000d", {888, 13}, {4, 5}}, + {"\U00000378\U0000000a", {888, 10}, {2, 3}}, + {"\U00000378\U00000308\U0000000a", {888, 10}, {4, 5}}, + {"\U00000378\U00000001", {888, 1}, {2, 3}}, + {"\U00000378\U00000308\U00000001", {888, 1}, {4, 5}}, + {"\U00000378\U0000034f", {888}, {4}}, + {"\U00000378\U00000308\U0000034f", {888}, {6}}, + {"\U00000378\U0001f1e6", {888, 127462}, {2, 6}}, + {"\U00000378\U00000308\U0001f1e6", {888, 127462}, {4, 8}}, + {"\U00000378\U00000600", {888, 1536}, {2, 4}}, + {"\U00000378\U00000308\U00000600", {888, 1536}, {4, 6}}, + {"\U00000378\U00000903", {888}, {5}}, + {"\U00000378\U00000308\U00000903", {888}, {7}}, + {"\U00000378\U00001100", {888, 4352}, {2, 5}}, + {"\U00000378\U00000308\U00001100", {888, 4352}, {4, 7}}, + {"\U00000378\U00001160", {888, 4448}, {2, 5}}, + {"\U00000378\U00000308\U00001160", {888, 4448}, {4, 7}}, + {"\U00000378\U000011a8", {888, 4520}, {2, 5}}, + {"\U00000378\U00000308\U000011a8", {888, 4520}, {4, 7}}, + {"\U00000378\U0000ac00", {888, 44032}, {2, 5}}, + {"\U00000378\U00000308\U0000ac00", {888, 44032}, {4, 7}}, + {"\U00000378\U0000ac01", {888, 44033}, {2, 5}}, + {"\U00000378\U00000308\U0000ac01", {888, 44033}, {4, 7}}, + {"\U00000378\U0000231a", {888, 8986}, {2, 5}}, + {"\U00000378\U00000308\U0000231a", {888, 8986}, {4, 7}}, + {"\U00000378\U00000300", {888}, {4}}, + {"\U00000378\U00000308\U00000300", {888}, {6}}, + {"\U00000378\U0000200d", {888}, {5}}, + {"\U00000378\U00000308\U0000200d", {888}, {7}}, + {"\U00000378\U00000378", {888, 888}, {2, 4}}, + {"\U00000378\U00000308\U00000378", {888, 888}, {4, 6}}, + {"\U0000000d\U0000000a\U00000061\U0000000a\U00000308", {13, 97, 10, 776}, {2, 3, 4, 6}}, + {"\U00000061\U00000308", {97}, {3}}, + {"\U00000020\U0000200d\U00000646", {32, 1606}, {4, 6}}, + {"\U00000646\U0000200d\U00000020", {1606, 32}, {5, 6}}, + {"\U00001100\U00001100", {4352}, {6}}, + {"\U0000ac00\U000011a8\U00001100", {44032, 4352}, {6, 9}}, + {"\U0000ac01\U000011a8\U00001100", {44033, 4352}, {6, 9}}, + {"\U0001f1e6\U0001f1e7\U0001f1e8\U00000062", {127462, 127464, 98}, {8, 12, 13}}, + {"\U00000061\U0001f1e6\U0001f1e7\U0001f1e8\U00000062", {97, 127462, 127464, 98}, {1, 9, 13, 14}}, + {"\U00000061\U0001f1e6\U0001f1e7\U0000200d\U0001f1e8\U00000062", {97, 127462, 127464, 98}, {1, 12, 16, 17}}, + {"\U00000061\U0001f1e6\U0000200d\U0001f1e7\U0001f1e8\U00000062", {97, 127462, 127463, 98}, {1, 8, 16, 17}}, + {"\U00000061\U0001f1e6\U0001f1e7\U0001f1e8\U0001f1e9\U00000062", {97, 127462, 127464, 98}, {1, 9, 17, 18}}, + {"\U00000061\U0000200d", {97}, {4}}, + {"\U00000061\U00000308\U00000062", {97, 98}, {3, 4}}, + {"\U00000061\U00000903\U00000062", {97, 98}, {4, 5}}, + {"\U00000061\U00000600\U00000062", {97, 1536}, {1, 4}}, + {"\U0001f476\U0001f3ff\U0001f476", {128118, 128118}, {8, 12}}, + {"\U00000061\U0001f3ff\U0001f476", {97, 128118}, {5, 9}}, + {"\U00000061\U0001f3ff\U0001f476\U0000200d\U0001f6d1", {97, 128118}, {5, 16}}, + {"\U0001f476\U0001f3ff\U00000308\U0000200d\U0001f476\U0001f3ff", {128118}, {21}}, + {"\U0001f6d1\U0000200d\U0001f6d1", {128721}, {11}}, + {"\U00000061\U0000200d\U0001f6d1", {97, 128721}, {4, 8}}, + {"\U00002701\U0000200d\U00002701", {9985}, {9}}, + {"\U00000061\U0000200d\U00002701", {97, 9985}, {4, 7}}}}; + +/// The data for UTF-16. +/// +/// Note that most of the data for the UTF-16 and UTF-32 are identical. However +/// since the size of the code units differ the breaks can contain different +/// values. +std::array, 602> data_utf16 = { + {{L"\U00000020\U00000020", {32, 32}, {1, 2}}, + {L"\U00000020\U00000308\U00000020", {32, 32}, {2, 3}}, + {L"\U00000020\U0000000d", {32, 13}, {1, 2}}, + {L"\U00000020\U00000308\U0000000d", {32, 13}, {2, 3}}, + {L"\U00000020\U0000000a", {32, 10}, {1, 2}}, + {L"\U00000020\U00000308\U0000000a", {32, 10}, {2, 3}}, + {L"\U00000020\U00000001", {32, 1}, {1, 2}}, + {L"\U00000020\U00000308\U00000001", {32, 1}, {2, 3}}, + {L"\U00000020\U0000034f", {32}, {2}}, + {L"\U00000020\U00000308\U0000034f", {32}, {3}}, + {L"\U00000020\U0001f1e6", {32, 127462}, {1, 3}}, + {L"\U00000020\U00000308\U0001f1e6", {32, 127462}, {2, 4}}, + {L"\U00000020\U00000600", {32, 1536}, {1, 2}}, + {L"\U00000020\U00000308\U00000600", {32, 1536}, {2, 3}}, + {L"\U00000020\U00000903", {32}, {2}}, + {L"\U00000020\U00000308\U00000903", {32}, {3}}, + {L"\U00000020\U00001100", {32, 4352}, {1, 2}}, + {L"\U00000020\U00000308\U00001100", {32, 4352}, {2, 3}}, + {L"\U00000020\U00001160", {32, 4448}, {1, 2}}, + {L"\U00000020\U00000308\U00001160", {32, 4448}, {2, 3}}, + {L"\U00000020\U000011a8", {32, 4520}, {1, 2}}, + {L"\U00000020\U00000308\U000011a8", {32, 4520}, {2, 3}}, + {L"\U00000020\U0000ac00", {32, 44032}, {1, 2}}, + {L"\U00000020\U00000308\U0000ac00", {32, 44032}, {2, 3}}, + {L"\U00000020\U0000ac01", {32, 44033}, {1, 2}}, + {L"\U00000020\U00000308\U0000ac01", {32, 44033}, {2, 3}}, + {L"\U00000020\U0000231a", {32, 8986}, {1, 2}}, + {L"\U00000020\U00000308\U0000231a", {32, 8986}, {2, 3}}, + {L"\U00000020\U00000300", {32}, {2}}, + {L"\U00000020\U00000308\U00000300", {32}, {3}}, + {L"\U00000020\U0000200d", {32}, {2}}, + {L"\U00000020\U00000308\U0000200d", {32}, {3}}, + {L"\U00000020\U00000378", {32, 888}, {1, 2}}, + {L"\U00000020\U00000308\U00000378", {32, 888}, {2, 3}}, + {L"\U0000000d\U00000020", {13, 32}, {1, 2}}, + {L"\U0000000d\U00000308\U00000020", {13, 776, 32}, {1, 2, 3}}, + {L"\U0000000d\U0000000d", {13, 13}, {1, 2}}, + {L"\U0000000d\U00000308\U0000000d", {13, 776, 13}, {1, 2, 3}}, + {L"\U0000000d\U0000000a", {13}, {2}}, + {L"\U0000000d\U00000308\U0000000a", {13, 776, 10}, {1, 2, 3}}, + {L"\U0000000d\U00000001", {13, 1}, {1, 2}}, + {L"\U0000000d\U00000308\U00000001", {13, 776, 1}, {1, 2, 3}}, + {L"\U0000000d\U0000034f", {13, 847}, {1, 2}}, + {L"\U0000000d\U00000308\U0000034f", {13, 776}, {1, 3}}, + {L"\U0000000d\U0001f1e6", {13, 127462}, {1, 3}}, + {L"\U0000000d\U00000308\U0001f1e6", {13, 776, 127462}, {1, 2, 4}}, + {L"\U0000000d\U00000600", {13, 1536}, {1, 2}}, + {L"\U0000000d\U00000308\U00000600", {13, 776, 1536}, {1, 2, 3}}, + {L"\U0000000d\U00000903", {13, 2307}, {1, 2}}, + {L"\U0000000d\U00000308\U00000903", {13, 776}, {1, 3}}, + {L"\U0000000d\U00001100", {13, 4352}, {1, 2}}, + {L"\U0000000d\U00000308\U00001100", {13, 776, 4352}, {1, 2, 3}}, + {L"\U0000000d\U00001160", {13, 4448}, {1, 2}}, + {L"\U0000000d\U00000308\U00001160", {13, 776, 4448}, {1, 2, 3}}, + {L"\U0000000d\U000011a8", {13, 4520}, {1, 2}}, + {L"\U0000000d\U00000308\U000011a8", {13, 776, 4520}, {1, 2, 3}}, + {L"\U0000000d\U0000ac00", {13, 44032}, {1, 2}}, + {L"\U0000000d\U00000308\U0000ac00", {13, 776, 44032}, {1, 2, 3}}, + {L"\U0000000d\U0000ac01", {13, 44033}, {1, 2}}, + {L"\U0000000d\U00000308\U0000ac01", {13, 776, 44033}, {1, 2, 3}}, + {L"\U0000000d\U0000231a", {13, 8986}, {1, 2}}, + {L"\U0000000d\U00000308\U0000231a", {13, 776, 8986}, {1, 2, 3}}, + {L"\U0000000d\U00000300", {13, 768}, {1, 2}}, + {L"\U0000000d\U00000308\U00000300", {13, 776}, {1, 3}}, + {L"\U0000000d\U0000200d", {13, 8205}, {1, 2}}, + {L"\U0000000d\U00000308\U0000200d", {13, 776}, {1, 3}}, + {L"\U0000000d\U00000378", {13, 888}, {1, 2}}, + {L"\U0000000d\U00000308\U00000378", {13, 776, 888}, {1, 2, 3}}, + {L"\U0000000a\U00000020", {10, 32}, {1, 2}}, + {L"\U0000000a\U00000308\U00000020", {10, 776, 32}, {1, 2, 3}}, + {L"\U0000000a\U0000000d", {10, 13}, {1, 2}}, + {L"\U0000000a\U00000308\U0000000d", {10, 776, 13}, {1, 2, 3}}, + {L"\U0000000a\U0000000a", {10, 10}, {1, 2}}, + {L"\U0000000a\U00000308\U0000000a", {10, 776, 10}, {1, 2, 3}}, + {L"\U0000000a\U00000001", {10, 1}, {1, 2}}, + {L"\U0000000a\U00000308\U00000001", {10, 776, 1}, {1, 2, 3}}, + {L"\U0000000a\U0000034f", {10, 847}, {1, 2}}, + {L"\U0000000a\U00000308\U0000034f", {10, 776}, {1, 3}}, + {L"\U0000000a\U0001f1e6", {10, 127462}, {1, 3}}, + {L"\U0000000a\U00000308\U0001f1e6", {10, 776, 127462}, {1, 2, 4}}, + {L"\U0000000a\U00000600", {10, 1536}, {1, 2}}, + {L"\U0000000a\U00000308\U00000600", {10, 776, 1536}, {1, 2, 3}}, + {L"\U0000000a\U00000903", {10, 2307}, {1, 2}}, + {L"\U0000000a\U00000308\U00000903", {10, 776}, {1, 3}}, + {L"\U0000000a\U00001100", {10, 4352}, {1, 2}}, + {L"\U0000000a\U00000308\U00001100", {10, 776, 4352}, {1, 2, 3}}, + {L"\U0000000a\U00001160", {10, 4448}, {1, 2}}, + {L"\U0000000a\U00000308\U00001160", {10, 776, 4448}, {1, 2, 3}}, + {L"\U0000000a\U000011a8", {10, 4520}, {1, 2}}, + {L"\U0000000a\U00000308\U000011a8", {10, 776, 4520}, {1, 2, 3}}, + {L"\U0000000a\U0000ac00", {10, 44032}, {1, 2}}, + {L"\U0000000a\U00000308\U0000ac00", {10, 776, 44032}, {1, 2, 3}}, + {L"\U0000000a\U0000ac01", {10, 44033}, {1, 2}}, + {L"\U0000000a\U00000308\U0000ac01", {10, 776, 44033}, {1, 2, 3}}, + {L"\U0000000a\U0000231a", {10, 8986}, {1, 2}}, + {L"\U0000000a\U00000308\U0000231a", {10, 776, 8986}, {1, 2, 3}}, + {L"\U0000000a\U00000300", {10, 768}, {1, 2}}, + {L"\U0000000a\U00000308\U00000300", {10, 776}, {1, 3}}, + {L"\U0000000a\U0000200d", {10, 8205}, {1, 2}}, + {L"\U0000000a\U00000308\U0000200d", {10, 776}, {1, 3}}, + {L"\U0000000a\U00000378", {10, 888}, {1, 2}}, + {L"\U0000000a\U00000308\U00000378", {10, 776, 888}, {1, 2, 3}}, + {L"\U00000001\U00000020", {1, 32}, {1, 2}}, + {L"\U00000001\U00000308\U00000020", {1, 776, 32}, {1, 2, 3}}, + {L"\U00000001\U0000000d", {1, 13}, {1, 2}}, + {L"\U00000001\U00000308\U0000000d", {1, 776, 13}, {1, 2, 3}}, + {L"\U00000001\U0000000a", {1, 10}, {1, 2}}, + {L"\U00000001\U00000308\U0000000a", {1, 776, 10}, {1, 2, 3}}, + {L"\U00000001\U00000001", {1, 1}, {1, 2}}, + {L"\U00000001\U00000308\U00000001", {1, 776, 1}, {1, 2, 3}}, + {L"\U00000001\U0000034f", {1, 847}, {1, 2}}, + {L"\U00000001\U00000308\U0000034f", {1, 776}, {1, 3}}, + {L"\U00000001\U0001f1e6", {1, 127462}, {1, 3}}, + {L"\U00000001\U00000308\U0001f1e6", {1, 776, 127462}, {1, 2, 4}}, + {L"\U00000001\U00000600", {1, 1536}, {1, 2}}, + {L"\U00000001\U00000308\U00000600", {1, 776, 1536}, {1, 2, 3}}, + {L"\U00000001\U00000903", {1, 2307}, {1, 2}}, + {L"\U00000001\U00000308\U00000903", {1, 776}, {1, 3}}, + {L"\U00000001\U00001100", {1, 4352}, {1, 2}}, + {L"\U00000001\U00000308\U00001100", {1, 776, 4352}, {1, 2, 3}}, + {L"\U00000001\U00001160", {1, 4448}, {1, 2}}, + {L"\U00000001\U00000308\U00001160", {1, 776, 4448}, {1, 2, 3}}, + {L"\U00000001\U000011a8", {1, 4520}, {1, 2}}, + {L"\U00000001\U00000308\U000011a8", {1, 776, 4520}, {1, 2, 3}}, + {L"\U00000001\U0000ac00", {1, 44032}, {1, 2}}, + {L"\U00000001\U00000308\U0000ac00", {1, 776, 44032}, {1, 2, 3}}, + {L"\U00000001\U0000ac01", {1, 44033}, {1, 2}}, + {L"\U00000001\U00000308\U0000ac01", {1, 776, 44033}, {1, 2, 3}}, + {L"\U00000001\U0000231a", {1, 8986}, {1, 2}}, + {L"\U00000001\U00000308\U0000231a", {1, 776, 8986}, {1, 2, 3}}, + {L"\U00000001\U00000300", {1, 768}, {1, 2}}, + {L"\U00000001\U00000308\U00000300", {1, 776}, {1, 3}}, + {L"\U00000001\U0000200d", {1, 8205}, {1, 2}}, + {L"\U00000001\U00000308\U0000200d", {1, 776}, {1, 3}}, + {L"\U00000001\U00000378", {1, 888}, {1, 2}}, + {L"\U00000001\U00000308\U00000378", {1, 776, 888}, {1, 2, 3}}, + {L"\U0000034f\U00000020", {847, 32}, {1, 2}}, + {L"\U0000034f\U00000308\U00000020", {847, 32}, {2, 3}}, + {L"\U0000034f\U0000000d", {847, 13}, {1, 2}}, + {L"\U0000034f\U00000308\U0000000d", {847, 13}, {2, 3}}, + {L"\U0000034f\U0000000a", {847, 10}, {1, 2}}, + {L"\U0000034f\U00000308\U0000000a", {847, 10}, {2, 3}}, + {L"\U0000034f\U00000001", {847, 1}, {1, 2}}, + {L"\U0000034f\U00000308\U00000001", {847, 1}, {2, 3}}, + {L"\U0000034f\U0000034f", {847}, {2}}, + {L"\U0000034f\U00000308\U0000034f", {847}, {3}}, + {L"\U0000034f\U0001f1e6", {847, 127462}, {1, 3}}, + {L"\U0000034f\U00000308\U0001f1e6", {847, 127462}, {2, 4}}, + {L"\U0000034f\U00000600", {847, 1536}, {1, 2}}, + {L"\U0000034f\U00000308\U00000600", {847, 1536}, {2, 3}}, + {L"\U0000034f\U00000903", {847}, {2}}, + {L"\U0000034f\U00000308\U00000903", {847}, {3}}, + {L"\U0000034f\U00001100", {847, 4352}, {1, 2}}, + {L"\U0000034f\U00000308\U00001100", {847, 4352}, {2, 3}}, + {L"\U0000034f\U00001160", {847, 4448}, {1, 2}}, + {L"\U0000034f\U00000308\U00001160", {847, 4448}, {2, 3}}, + {L"\U0000034f\U000011a8", {847, 4520}, {1, 2}}, + {L"\U0000034f\U00000308\U000011a8", {847, 4520}, {2, 3}}, + {L"\U0000034f\U0000ac00", {847, 44032}, {1, 2}}, + {L"\U0000034f\U00000308\U0000ac00", {847, 44032}, {2, 3}}, + {L"\U0000034f\U0000ac01", {847, 44033}, {1, 2}}, + {L"\U0000034f\U00000308\U0000ac01", {847, 44033}, {2, 3}}, + {L"\U0000034f\U0000231a", {847, 8986}, {1, 2}}, + {L"\U0000034f\U00000308\U0000231a", {847, 8986}, {2, 3}}, + {L"\U0000034f\U00000300", {847}, {2}}, + {L"\U0000034f\U00000308\U00000300", {847}, {3}}, + {L"\U0000034f\U0000200d", {847}, {2}}, + {L"\U0000034f\U00000308\U0000200d", {847}, {3}}, + {L"\U0000034f\U00000378", {847, 888}, {1, 2}}, + {L"\U0000034f\U00000308\U00000378", {847, 888}, {2, 3}}, + {L"\U0001f1e6\U00000020", {127462, 32}, {2, 3}}, + {L"\U0001f1e6\U00000308\U00000020", {127462, 32}, {3, 4}}, + {L"\U0001f1e6\U0000000d", {127462, 13}, {2, 3}}, + {L"\U0001f1e6\U00000308\U0000000d", {127462, 13}, {3, 4}}, + {L"\U0001f1e6\U0000000a", {127462, 10}, {2, 3}}, + {L"\U0001f1e6\U00000308\U0000000a", {127462, 10}, {3, 4}}, + {L"\U0001f1e6\U00000001", {127462, 1}, {2, 3}}, + {L"\U0001f1e6\U00000308\U00000001", {127462, 1}, {3, 4}}, + {L"\U0001f1e6\U0000034f", {127462}, {3}}, + {L"\U0001f1e6\U00000308\U0000034f", {127462}, {4}}, + {L"\U0001f1e6\U0001f1e6", {127462}, {4}}, + {L"\U0001f1e6\U00000308\U0001f1e6", {127462, 127462}, {3, 5}}, + {L"\U0001f1e6\U00000600", {127462, 1536}, {2, 3}}, + {L"\U0001f1e6\U00000308\U00000600", {127462, 1536}, {3, 4}}, + {L"\U0001f1e6\U00000903", {127462}, {3}}, + {L"\U0001f1e6\U00000308\U00000903", {127462}, {4}}, + {L"\U0001f1e6\U00001100", {127462, 4352}, {2, 3}}, + {L"\U0001f1e6\U00000308\U00001100", {127462, 4352}, {3, 4}}, + {L"\U0001f1e6\U00001160", {127462, 4448}, {2, 3}}, + {L"\U0001f1e6\U00000308\U00001160", {127462, 4448}, {3, 4}}, + {L"\U0001f1e6\U000011a8", {127462, 4520}, {2, 3}}, + {L"\U0001f1e6\U00000308\U000011a8", {127462, 4520}, {3, 4}}, + {L"\U0001f1e6\U0000ac00", {127462, 44032}, {2, 3}}, + {L"\U0001f1e6\U00000308\U0000ac00", {127462, 44032}, {3, 4}}, + {L"\U0001f1e6\U0000ac01", {127462, 44033}, {2, 3}}, + {L"\U0001f1e6\U00000308\U0000ac01", {127462, 44033}, {3, 4}}, + {L"\U0001f1e6\U0000231a", {127462, 8986}, {2, 3}}, + {L"\U0001f1e6\U00000308\U0000231a", {127462, 8986}, {3, 4}}, + {L"\U0001f1e6\U00000300", {127462}, {3}}, + {L"\U0001f1e6\U00000308\U00000300", {127462}, {4}}, + {L"\U0001f1e6\U0000200d", {127462}, {3}}, + {L"\U0001f1e6\U00000308\U0000200d", {127462}, {4}}, + {L"\U0001f1e6\U00000378", {127462, 888}, {2, 3}}, + {L"\U0001f1e6\U00000308\U00000378", {127462, 888}, {3, 4}}, + {L"\U00000600\U00000020", {1536}, {2}}, + {L"\U00000600\U00000308\U00000020", {1536, 32}, {2, 3}}, + {L"\U00000600\U0000000d", {1536, 13}, {1, 2}}, + {L"\U00000600\U00000308\U0000000d", {1536, 13}, {2, 3}}, + {L"\U00000600\U0000000a", {1536, 10}, {1, 2}}, + {L"\U00000600\U00000308\U0000000a", {1536, 10}, {2, 3}}, + {L"\U00000600\U00000001", {1536, 1}, {1, 2}}, + {L"\U00000600\U00000308\U00000001", {1536, 1}, {2, 3}}, + {L"\U00000600\U0000034f", {1536}, {2}}, + {L"\U00000600\U00000308\U0000034f", {1536}, {3}}, + {L"\U00000600\U0001f1e6", {1536}, {3}}, + {L"\U00000600\U00000308\U0001f1e6", {1536, 127462}, {2, 4}}, + {L"\U00000600\U00000600", {1536}, {2}}, + {L"\U00000600\U00000308\U00000600", {1536, 1536}, {2, 3}}, + {L"\U00000600\U00000903", {1536}, {2}}, + {L"\U00000600\U00000308\U00000903", {1536}, {3}}, + {L"\U00000600\U00001100", {1536}, {2}}, + {L"\U00000600\U00000308\U00001100", {1536, 4352}, {2, 3}}, + {L"\U00000600\U00001160", {1536}, {2}}, + {L"\U00000600\U00000308\U00001160", {1536, 4448}, {2, 3}}, + {L"\U00000600\U000011a8", {1536}, {2}}, + {L"\U00000600\U00000308\U000011a8", {1536, 4520}, {2, 3}}, + {L"\U00000600\U0000ac00", {1536}, {2}}, + {L"\U00000600\U00000308\U0000ac00", {1536, 44032}, {2, 3}}, + {L"\U00000600\U0000ac01", {1536}, {2}}, + {L"\U00000600\U00000308\U0000ac01", {1536, 44033}, {2, 3}}, + {L"\U00000600\U0000231a", {1536}, {2}}, + {L"\U00000600\U00000308\U0000231a", {1536, 8986}, {2, 3}}, + {L"\U00000600\U00000300", {1536}, {2}}, + {L"\U00000600\U00000308\U00000300", {1536}, {3}}, + {L"\U00000600\U0000200d", {1536}, {2}}, + {L"\U00000600\U00000308\U0000200d", {1536}, {3}}, + {L"\U00000600\U00000378", {1536}, {2}}, + {L"\U00000600\U00000308\U00000378", {1536, 888}, {2, 3}}, + {L"\U00000903\U00000020", {2307, 32}, {1, 2}}, + {L"\U00000903\U00000308\U00000020", {2307, 32}, {2, 3}}, + {L"\U00000903\U0000000d", {2307, 13}, {1, 2}}, + {L"\U00000903\U00000308\U0000000d", {2307, 13}, {2, 3}}, + {L"\U00000903\U0000000a", {2307, 10}, {1, 2}}, + {L"\U00000903\U00000308\U0000000a", {2307, 10}, {2, 3}}, + {L"\U00000903\U00000001", {2307, 1}, {1, 2}}, + {L"\U00000903\U00000308\U00000001", {2307, 1}, {2, 3}}, + {L"\U00000903\U0000034f", {2307}, {2}}, + {L"\U00000903\U00000308\U0000034f", {2307}, {3}}, + {L"\U00000903\U0001f1e6", {2307, 127462}, {1, 3}}, + {L"\U00000903\U00000308\U0001f1e6", {2307, 127462}, {2, 4}}, + {L"\U00000903\U00000600", {2307, 1536}, {1, 2}}, + {L"\U00000903\U00000308\U00000600", {2307, 1536}, {2, 3}}, + {L"\U00000903\U00000903", {2307}, {2}}, + {L"\U00000903\U00000308\U00000903", {2307}, {3}}, + {L"\U00000903\U00001100", {2307, 4352}, {1, 2}}, + {L"\U00000903\U00000308\U00001100", {2307, 4352}, {2, 3}}, + {L"\U00000903\U00001160", {2307, 4448}, {1, 2}}, + {L"\U00000903\U00000308\U00001160", {2307, 4448}, {2, 3}}, + {L"\U00000903\U000011a8", {2307, 4520}, {1, 2}}, + {L"\U00000903\U00000308\U000011a8", {2307, 4520}, {2, 3}}, + {L"\U00000903\U0000ac00", {2307, 44032}, {1, 2}}, + {L"\U00000903\U00000308\U0000ac00", {2307, 44032}, {2, 3}}, + {L"\U00000903\U0000ac01", {2307, 44033}, {1, 2}}, + {L"\U00000903\U00000308\U0000ac01", {2307, 44033}, {2, 3}}, + {L"\U00000903\U0000231a", {2307, 8986}, {1, 2}}, + {L"\U00000903\U00000308\U0000231a", {2307, 8986}, {2, 3}}, + {L"\U00000903\U00000300", {2307}, {2}}, + {L"\U00000903\U00000308\U00000300", {2307}, {3}}, + {L"\U00000903\U0000200d", {2307}, {2}}, + {L"\U00000903\U00000308\U0000200d", {2307}, {3}}, + {L"\U00000903\U00000378", {2307, 888}, {1, 2}}, + {L"\U00000903\U00000308\U00000378", {2307, 888}, {2, 3}}, + {L"\U00001100\U00000020", {4352, 32}, {1, 2}}, + {L"\U00001100\U00000308\U00000020", {4352, 32}, {2, 3}}, + {L"\U00001100\U0000000d", {4352, 13}, {1, 2}}, + {L"\U00001100\U00000308\U0000000d", {4352, 13}, {2, 3}}, + {L"\U00001100\U0000000a", {4352, 10}, {1, 2}}, + {L"\U00001100\U00000308\U0000000a", {4352, 10}, {2, 3}}, + {L"\U00001100\U00000001", {4352, 1}, {1, 2}}, + {L"\U00001100\U00000308\U00000001", {4352, 1}, {2, 3}}, + {L"\U00001100\U0000034f", {4352}, {2}}, + {L"\U00001100\U00000308\U0000034f", {4352}, {3}}, + {L"\U00001100\U0001f1e6", {4352, 127462}, {1, 3}}, + {L"\U00001100\U00000308\U0001f1e6", {4352, 127462}, {2, 4}}, + {L"\U00001100\U00000600", {4352, 1536}, {1, 2}}, + {L"\U00001100\U00000308\U00000600", {4352, 1536}, {2, 3}}, + {L"\U00001100\U00000903", {4352}, {2}}, + {L"\U00001100\U00000308\U00000903", {4352}, {3}}, + {L"\U00001100\U00001100", {4352}, {2}}, + {L"\U00001100\U00000308\U00001100", {4352, 4352}, {2, 3}}, + {L"\U00001100\U00001160", {4352}, {2}}, + {L"\U00001100\U00000308\U00001160", {4352, 4448}, {2, 3}}, + {L"\U00001100\U000011a8", {4352, 4520}, {1, 2}}, + {L"\U00001100\U00000308\U000011a8", {4352, 4520}, {2, 3}}, + {L"\U00001100\U0000ac00", {4352}, {2}}, + {L"\U00001100\U00000308\U0000ac00", {4352, 44032}, {2, 3}}, + {L"\U00001100\U0000ac01", {4352}, {2}}, + {L"\U00001100\U00000308\U0000ac01", {4352, 44033}, {2, 3}}, + {L"\U00001100\U0000231a", {4352, 8986}, {1, 2}}, + {L"\U00001100\U00000308\U0000231a", {4352, 8986}, {2, 3}}, + {L"\U00001100\U00000300", {4352}, {2}}, + {L"\U00001100\U00000308\U00000300", {4352}, {3}}, + {L"\U00001100\U0000200d", {4352}, {2}}, + {L"\U00001100\U00000308\U0000200d", {4352}, {3}}, + {L"\U00001100\U00000378", {4352, 888}, {1, 2}}, + {L"\U00001100\U00000308\U00000378", {4352, 888}, {2, 3}}, + {L"\U00001160\U00000020", {4448, 32}, {1, 2}}, + {L"\U00001160\U00000308\U00000020", {4448, 32}, {2, 3}}, + {L"\U00001160\U0000000d", {4448, 13}, {1, 2}}, + {L"\U00001160\U00000308\U0000000d", {4448, 13}, {2, 3}}, + {L"\U00001160\U0000000a", {4448, 10}, {1, 2}}, + {L"\U00001160\U00000308\U0000000a", {4448, 10}, {2, 3}}, + {L"\U00001160\U00000001", {4448, 1}, {1, 2}}, + {L"\U00001160\U00000308\U00000001", {4448, 1}, {2, 3}}, + {L"\U00001160\U0000034f", {4448}, {2}}, + {L"\U00001160\U00000308\U0000034f", {4448}, {3}}, + {L"\U00001160\U0001f1e6", {4448, 127462}, {1, 3}}, + {L"\U00001160\U00000308\U0001f1e6", {4448, 127462}, {2, 4}}, + {L"\U00001160\U00000600", {4448, 1536}, {1, 2}}, + {L"\U00001160\U00000308\U00000600", {4448, 1536}, {2, 3}}, + {L"\U00001160\U00000903", {4448}, {2}}, + {L"\U00001160\U00000308\U00000903", {4448}, {3}}, + {L"\U00001160\U00001100", {4448, 4352}, {1, 2}}, + {L"\U00001160\U00000308\U00001100", {4448, 4352}, {2, 3}}, + {L"\U00001160\U00001160", {4448}, {2}}, + {L"\U00001160\U00000308\U00001160", {4448, 4448}, {2, 3}}, + {L"\U00001160\U000011a8", {4448}, {2}}, + {L"\U00001160\U00000308\U000011a8", {4448, 4520}, {2, 3}}, + {L"\U00001160\U0000ac00", {4448, 44032}, {1, 2}}, + {L"\U00001160\U00000308\U0000ac00", {4448, 44032}, {2, 3}}, + {L"\U00001160\U0000ac01", {4448, 44033}, {1, 2}}, + {L"\U00001160\U00000308\U0000ac01", {4448, 44033}, {2, 3}}, + {L"\U00001160\U0000231a", {4448, 8986}, {1, 2}}, + {L"\U00001160\U00000308\U0000231a", {4448, 8986}, {2, 3}}, + {L"\U00001160\U00000300", {4448}, {2}}, + {L"\U00001160\U00000308\U00000300", {4448}, {3}}, + {L"\U00001160\U0000200d", {4448}, {2}}, + {L"\U00001160\U00000308\U0000200d", {4448}, {3}}, + {L"\U00001160\U00000378", {4448, 888}, {1, 2}}, + {L"\U00001160\U00000308\U00000378", {4448, 888}, {2, 3}}, + {L"\U000011a8\U00000020", {4520, 32}, {1, 2}}, + {L"\U000011a8\U00000308\U00000020", {4520, 32}, {2, 3}}, + {L"\U000011a8\U0000000d", {4520, 13}, {1, 2}}, + {L"\U000011a8\U00000308\U0000000d", {4520, 13}, {2, 3}}, + {L"\U000011a8\U0000000a", {4520, 10}, {1, 2}}, + {L"\U000011a8\U00000308\U0000000a", {4520, 10}, {2, 3}}, + {L"\U000011a8\U00000001", {4520, 1}, {1, 2}}, + {L"\U000011a8\U00000308\U00000001", {4520, 1}, {2, 3}}, + {L"\U000011a8\U0000034f", {4520}, {2}}, + {L"\U000011a8\U00000308\U0000034f", {4520}, {3}}, + {L"\U000011a8\U0001f1e6", {4520, 127462}, {1, 3}}, + {L"\U000011a8\U00000308\U0001f1e6", {4520, 127462}, {2, 4}}, + {L"\U000011a8\U00000600", {4520, 1536}, {1, 2}}, + {L"\U000011a8\U00000308\U00000600", {4520, 1536}, {2, 3}}, + {L"\U000011a8\U00000903", {4520}, {2}}, + {L"\U000011a8\U00000308\U00000903", {4520}, {3}}, + {L"\U000011a8\U00001100", {4520, 4352}, {1, 2}}, + {L"\U000011a8\U00000308\U00001100", {4520, 4352}, {2, 3}}, + {L"\U000011a8\U00001160", {4520, 4448}, {1, 2}}, + {L"\U000011a8\U00000308\U00001160", {4520, 4448}, {2, 3}}, + {L"\U000011a8\U000011a8", {4520}, {2}}, + {L"\U000011a8\U00000308\U000011a8", {4520, 4520}, {2, 3}}, + {L"\U000011a8\U0000ac00", {4520, 44032}, {1, 2}}, + {L"\U000011a8\U00000308\U0000ac00", {4520, 44032}, {2, 3}}, + {L"\U000011a8\U0000ac01", {4520, 44033}, {1, 2}}, + {L"\U000011a8\U00000308\U0000ac01", {4520, 44033}, {2, 3}}, + {L"\U000011a8\U0000231a", {4520, 8986}, {1, 2}}, + {L"\U000011a8\U00000308\U0000231a", {4520, 8986}, {2, 3}}, + {L"\U000011a8\U00000300", {4520}, {2}}, + {L"\U000011a8\U00000308\U00000300", {4520}, {3}}, + {L"\U000011a8\U0000200d", {4520}, {2}}, + {L"\U000011a8\U00000308\U0000200d", {4520}, {3}}, + {L"\U000011a8\U00000378", {4520, 888}, {1, 2}}, + {L"\U000011a8\U00000308\U00000378", {4520, 888}, {2, 3}}, + {L"\U0000ac00\U00000020", {44032, 32}, {1, 2}}, + {L"\U0000ac00\U00000308\U00000020", {44032, 32}, {2, 3}}, + {L"\U0000ac00\U0000000d", {44032, 13}, {1, 2}}, + {L"\U0000ac00\U00000308\U0000000d", {44032, 13}, {2, 3}}, + {L"\U0000ac00\U0000000a", {44032, 10}, {1, 2}}, + {L"\U0000ac00\U00000308\U0000000a", {44032, 10}, {2, 3}}, + {L"\U0000ac00\U00000001", {44032, 1}, {1, 2}}, + {L"\U0000ac00\U00000308\U00000001", {44032, 1}, {2, 3}}, + {L"\U0000ac00\U0000034f", {44032}, {2}}, + {L"\U0000ac00\U00000308\U0000034f", {44032}, {3}}, + {L"\U0000ac00\U0001f1e6", {44032, 127462}, {1, 3}}, + {L"\U0000ac00\U00000308\U0001f1e6", {44032, 127462}, {2, 4}}, + {L"\U0000ac00\U00000600", {44032, 1536}, {1, 2}}, + {L"\U0000ac00\U00000308\U00000600", {44032, 1536}, {2, 3}}, + {L"\U0000ac00\U00000903", {44032}, {2}}, + {L"\U0000ac00\U00000308\U00000903", {44032}, {3}}, + {L"\U0000ac00\U00001100", {44032, 4352}, {1, 2}}, + {L"\U0000ac00\U00000308\U00001100", {44032, 4352}, {2, 3}}, + {L"\U0000ac00\U00001160", {44032}, {2}}, + {L"\U0000ac00\U00000308\U00001160", {44032, 4448}, {2, 3}}, + {L"\U0000ac00\U000011a8", {44032}, {2}}, + {L"\U0000ac00\U00000308\U000011a8", {44032, 4520}, {2, 3}}, + {L"\U0000ac00\U0000ac00", {44032, 44032}, {1, 2}}, + {L"\U0000ac00\U00000308\U0000ac00", {44032, 44032}, {2, 3}}, + {L"\U0000ac00\U0000ac01", {44032, 44033}, {1, 2}}, + {L"\U0000ac00\U00000308\U0000ac01", {44032, 44033}, {2, 3}}, + {L"\U0000ac00\U0000231a", {44032, 8986}, {1, 2}}, + {L"\U0000ac00\U00000308\U0000231a", {44032, 8986}, {2, 3}}, + {L"\U0000ac00\U00000300", {44032}, {2}}, + {L"\U0000ac00\U00000308\U00000300", {44032}, {3}}, + {L"\U0000ac00\U0000200d", {44032}, {2}}, + {L"\U0000ac00\U00000308\U0000200d", {44032}, {3}}, + {L"\U0000ac00\U00000378", {44032, 888}, {1, 2}}, + {L"\U0000ac00\U00000308\U00000378", {44032, 888}, {2, 3}}, + {L"\U0000ac01\U00000020", {44033, 32}, {1, 2}}, + {L"\U0000ac01\U00000308\U00000020", {44033, 32}, {2, 3}}, + {L"\U0000ac01\U0000000d", {44033, 13}, {1, 2}}, + {L"\U0000ac01\U00000308\U0000000d", {44033, 13}, {2, 3}}, + {L"\U0000ac01\U0000000a", {44033, 10}, {1, 2}}, + {L"\U0000ac01\U00000308\U0000000a", {44033, 10}, {2, 3}}, + {L"\U0000ac01\U00000001", {44033, 1}, {1, 2}}, + {L"\U0000ac01\U00000308\U00000001", {44033, 1}, {2, 3}}, + {L"\U0000ac01\U0000034f", {44033}, {2}}, + {L"\U0000ac01\U00000308\U0000034f", {44033}, {3}}, + {L"\U0000ac01\U0001f1e6", {44033, 127462}, {1, 3}}, + {L"\U0000ac01\U00000308\U0001f1e6", {44033, 127462}, {2, 4}}, + {L"\U0000ac01\U00000600", {44033, 1536}, {1, 2}}, + {L"\U0000ac01\U00000308\U00000600", {44033, 1536}, {2, 3}}, + {L"\U0000ac01\U00000903", {44033}, {2}}, + {L"\U0000ac01\U00000308\U00000903", {44033}, {3}}, + {L"\U0000ac01\U00001100", {44033, 4352}, {1, 2}}, + {L"\U0000ac01\U00000308\U00001100", {44033, 4352}, {2, 3}}, + {L"\U0000ac01\U00001160", {44033, 4448}, {1, 2}}, + {L"\U0000ac01\U00000308\U00001160", {44033, 4448}, {2, 3}}, + {L"\U0000ac01\U000011a8", {44033}, {2}}, + {L"\U0000ac01\U00000308\U000011a8", {44033, 4520}, {2, 3}}, + {L"\U0000ac01\U0000ac00", {44033, 44032}, {1, 2}}, + {L"\U0000ac01\U00000308\U0000ac00", {44033, 44032}, {2, 3}}, + {L"\U0000ac01\U0000ac01", {44033, 44033}, {1, 2}}, + {L"\U0000ac01\U00000308\U0000ac01", {44033, 44033}, {2, 3}}, + {L"\U0000ac01\U0000231a", {44033, 8986}, {1, 2}}, + {L"\U0000ac01\U00000308\U0000231a", {44033, 8986}, {2, 3}}, + {L"\U0000ac01\U00000300", {44033}, {2}}, + {L"\U0000ac01\U00000308\U00000300", {44033}, {3}}, + {L"\U0000ac01\U0000200d", {44033}, {2}}, + {L"\U0000ac01\U00000308\U0000200d", {44033}, {3}}, + {L"\U0000ac01\U00000378", {44033, 888}, {1, 2}}, + {L"\U0000ac01\U00000308\U00000378", {44033, 888}, {2, 3}}, + {L"\U0000231a\U00000020", {8986, 32}, {1, 2}}, + {L"\U0000231a\U00000308\U00000020", {8986, 32}, {2, 3}}, + {L"\U0000231a\U0000000d", {8986, 13}, {1, 2}}, + {L"\U0000231a\U00000308\U0000000d", {8986, 13}, {2, 3}}, + {L"\U0000231a\U0000000a", {8986, 10}, {1, 2}}, + {L"\U0000231a\U00000308\U0000000a", {8986, 10}, {2, 3}}, + {L"\U0000231a\U00000001", {8986, 1}, {1, 2}}, + {L"\U0000231a\U00000308\U00000001", {8986, 1}, {2, 3}}, + {L"\U0000231a\U0000034f", {8986}, {2}}, + {L"\U0000231a\U00000308\U0000034f", {8986}, {3}}, + {L"\U0000231a\U0001f1e6", {8986, 127462}, {1, 3}}, + {L"\U0000231a\U00000308\U0001f1e6", {8986, 127462}, {2, 4}}, + {L"\U0000231a\U00000600", {8986, 1536}, {1, 2}}, + {L"\U0000231a\U00000308\U00000600", {8986, 1536}, {2, 3}}, + {L"\U0000231a\U00000903", {8986}, {2}}, + {L"\U0000231a\U00000308\U00000903", {8986}, {3}}, + {L"\U0000231a\U00001100", {8986, 4352}, {1, 2}}, + {L"\U0000231a\U00000308\U00001100", {8986, 4352}, {2, 3}}, + {L"\U0000231a\U00001160", {8986, 4448}, {1, 2}}, + {L"\U0000231a\U00000308\U00001160", {8986, 4448}, {2, 3}}, + {L"\U0000231a\U000011a8", {8986, 4520}, {1, 2}}, + {L"\U0000231a\U00000308\U000011a8", {8986, 4520}, {2, 3}}, + {L"\U0000231a\U0000ac00", {8986, 44032}, {1, 2}}, + {L"\U0000231a\U00000308\U0000ac00", {8986, 44032}, {2, 3}}, + {L"\U0000231a\U0000ac01", {8986, 44033}, {1, 2}}, + {L"\U0000231a\U00000308\U0000ac01", {8986, 44033}, {2, 3}}, + {L"\U0000231a\U0000231a", {8986, 8986}, {1, 2}}, + {L"\U0000231a\U00000308\U0000231a", {8986, 8986}, {2, 3}}, + {L"\U0000231a\U00000300", {8986}, {2}}, + {L"\U0000231a\U00000308\U00000300", {8986}, {3}}, + {L"\U0000231a\U0000200d", {8986}, {2}}, + {L"\U0000231a\U00000308\U0000200d", {8986}, {3}}, + {L"\U0000231a\U00000378", {8986, 888}, {1, 2}}, + {L"\U0000231a\U00000308\U00000378", {8986, 888}, {2, 3}}, + {L"\U00000300\U00000020", {768, 32}, {1, 2}}, + {L"\U00000300\U00000308\U00000020", {768, 32}, {2, 3}}, + {L"\U00000300\U0000000d", {768, 13}, {1, 2}}, + {L"\U00000300\U00000308\U0000000d", {768, 13}, {2, 3}}, + {L"\U00000300\U0000000a", {768, 10}, {1, 2}}, + {L"\U00000300\U00000308\U0000000a", {768, 10}, {2, 3}}, + {L"\U00000300\U00000001", {768, 1}, {1, 2}}, + {L"\U00000300\U00000308\U00000001", {768, 1}, {2, 3}}, + {L"\U00000300\U0000034f", {768}, {2}}, + {L"\U00000300\U00000308\U0000034f", {768}, {3}}, + {L"\U00000300\U0001f1e6", {768, 127462}, {1, 3}}, + {L"\U00000300\U00000308\U0001f1e6", {768, 127462}, {2, 4}}, + {L"\U00000300\U00000600", {768, 1536}, {1, 2}}, + {L"\U00000300\U00000308\U00000600", {768, 1536}, {2, 3}}, + {L"\U00000300\U00000903", {768}, {2}}, + {L"\U00000300\U00000308\U00000903", {768}, {3}}, + {L"\U00000300\U00001100", {768, 4352}, {1, 2}}, + {L"\U00000300\U00000308\U00001100", {768, 4352}, {2, 3}}, + {L"\U00000300\U00001160", {768, 4448}, {1, 2}}, + {L"\U00000300\U00000308\U00001160", {768, 4448}, {2, 3}}, + {L"\U00000300\U000011a8", {768, 4520}, {1, 2}}, + {L"\U00000300\U00000308\U000011a8", {768, 4520}, {2, 3}}, + {L"\U00000300\U0000ac00", {768, 44032}, {1, 2}}, + {L"\U00000300\U00000308\U0000ac00", {768, 44032}, {2, 3}}, + {L"\U00000300\U0000ac01", {768, 44033}, {1, 2}}, + {L"\U00000300\U00000308\U0000ac01", {768, 44033}, {2, 3}}, + {L"\U00000300\U0000231a", {768, 8986}, {1, 2}}, + {L"\U00000300\U00000308\U0000231a", {768, 8986}, {2, 3}}, + {L"\U00000300\U00000300", {768}, {2}}, + {L"\U00000300\U00000308\U00000300", {768}, {3}}, + {L"\U00000300\U0000200d", {768}, {2}}, + {L"\U00000300\U00000308\U0000200d", {768}, {3}}, + {L"\U00000300\U00000378", {768, 888}, {1, 2}}, + {L"\U00000300\U00000308\U00000378", {768, 888}, {2, 3}}, + {L"\U0000200d\U00000020", {8205, 32}, {1, 2}}, + {L"\U0000200d\U00000308\U00000020", {8205, 32}, {2, 3}}, + {L"\U0000200d\U0000000d", {8205, 13}, {1, 2}}, + {L"\U0000200d\U00000308\U0000000d", {8205, 13}, {2, 3}}, + {L"\U0000200d\U0000000a", {8205, 10}, {1, 2}}, + {L"\U0000200d\U00000308\U0000000a", {8205, 10}, {2, 3}}, + {L"\U0000200d\U00000001", {8205, 1}, {1, 2}}, + {L"\U0000200d\U00000308\U00000001", {8205, 1}, {2, 3}}, + {L"\U0000200d\U0000034f", {8205}, {2}}, + {L"\U0000200d\U00000308\U0000034f", {8205}, {3}}, + {L"\U0000200d\U0001f1e6", {8205, 127462}, {1, 3}}, + {L"\U0000200d\U00000308\U0001f1e6", {8205, 127462}, {2, 4}}, + {L"\U0000200d\U00000600", {8205, 1536}, {1, 2}}, + {L"\U0000200d\U00000308\U00000600", {8205, 1536}, {2, 3}}, + {L"\U0000200d\U00000903", {8205}, {2}}, + {L"\U0000200d\U00000308\U00000903", {8205}, {3}}, + {L"\U0000200d\U00001100", {8205, 4352}, {1, 2}}, + {L"\U0000200d\U00000308\U00001100", {8205, 4352}, {2, 3}}, + {L"\U0000200d\U00001160", {8205, 4448}, {1, 2}}, + {L"\U0000200d\U00000308\U00001160", {8205, 4448}, {2, 3}}, + {L"\U0000200d\U000011a8", {8205, 4520}, {1, 2}}, + {L"\U0000200d\U00000308\U000011a8", {8205, 4520}, {2, 3}}, + {L"\U0000200d\U0000ac00", {8205, 44032}, {1, 2}}, + {L"\U0000200d\U00000308\U0000ac00", {8205, 44032}, {2, 3}}, + {L"\U0000200d\U0000ac01", {8205, 44033}, {1, 2}}, + {L"\U0000200d\U00000308\U0000ac01", {8205, 44033}, {2, 3}}, + {L"\U0000200d\U0000231a", {8205, 8986}, {1, 2}}, + {L"\U0000200d\U00000308\U0000231a", {8205, 8986}, {2, 3}}, + {L"\U0000200d\U00000300", {8205}, {2}}, + {L"\U0000200d\U00000308\U00000300", {8205}, {3}}, + {L"\U0000200d\U0000200d", {8205}, {2}}, + {L"\U0000200d\U00000308\U0000200d", {8205}, {3}}, + {L"\U0000200d\U00000378", {8205, 888}, {1, 2}}, + {L"\U0000200d\U00000308\U00000378", {8205, 888}, {2, 3}}, + {L"\U00000378\U00000020", {888, 32}, {1, 2}}, + {L"\U00000378\U00000308\U00000020", {888, 32}, {2, 3}}, + {L"\U00000378\U0000000d", {888, 13}, {1, 2}}, + {L"\U00000378\U00000308\U0000000d", {888, 13}, {2, 3}}, + {L"\U00000378\U0000000a", {888, 10}, {1, 2}}, + {L"\U00000378\U00000308\U0000000a", {888, 10}, {2, 3}}, + {L"\U00000378\U00000001", {888, 1}, {1, 2}}, + {L"\U00000378\U00000308\U00000001", {888, 1}, {2, 3}}, + {L"\U00000378\U0000034f", {888}, {2}}, + {L"\U00000378\U00000308\U0000034f", {888}, {3}}, + {L"\U00000378\U0001f1e6", {888, 127462}, {1, 3}}, + {L"\U00000378\U00000308\U0001f1e6", {888, 127462}, {2, 4}}, + {L"\U00000378\U00000600", {888, 1536}, {1, 2}}, + {L"\U00000378\U00000308\U00000600", {888, 1536}, {2, 3}}, + {L"\U00000378\U00000903", {888}, {2}}, + {L"\U00000378\U00000308\U00000903", {888}, {3}}, + {L"\U00000378\U00001100", {888, 4352}, {1, 2}}, + {L"\U00000378\U00000308\U00001100", {888, 4352}, {2, 3}}, + {L"\U00000378\U00001160", {888, 4448}, {1, 2}}, + {L"\U00000378\U00000308\U00001160", {888, 4448}, {2, 3}}, + {L"\U00000378\U000011a8", {888, 4520}, {1, 2}}, + {L"\U00000378\U00000308\U000011a8", {888, 4520}, {2, 3}}, + {L"\U00000378\U0000ac00", {888, 44032}, {1, 2}}, + {L"\U00000378\U00000308\U0000ac00", {888, 44032}, {2, 3}}, + {L"\U00000378\U0000ac01", {888, 44033}, {1, 2}}, + {L"\U00000378\U00000308\U0000ac01", {888, 44033}, {2, 3}}, + {L"\U00000378\U0000231a", {888, 8986}, {1, 2}}, + {L"\U00000378\U00000308\U0000231a", {888, 8986}, {2, 3}}, + {L"\U00000378\U00000300", {888}, {2}}, + {L"\U00000378\U00000308\U00000300", {888}, {3}}, + {L"\U00000378\U0000200d", {888}, {2}}, + {L"\U00000378\U00000308\U0000200d", {888}, {3}}, + {L"\U00000378\U00000378", {888, 888}, {1, 2}}, + {L"\U00000378\U00000308\U00000378", {888, 888}, {2, 3}}, + {L"\U0000000d\U0000000a\U00000061\U0000000a\U00000308", {13, 97, 10, 776}, {2, 3, 4, 5}}, + {L"\U00000061\U00000308", {97}, {2}}, + {L"\U00000020\U0000200d\U00000646", {32, 1606}, {2, 3}}, + {L"\U00000646\U0000200d\U00000020", {1606, 32}, {2, 3}}, + {L"\U00001100\U00001100", {4352}, {2}}, + {L"\U0000ac00\U000011a8\U00001100", {44032, 4352}, {2, 3}}, + {L"\U0000ac01\U000011a8\U00001100", {44033, 4352}, {2, 3}}, + {L"\U0001f1e6\U0001f1e7\U0001f1e8\U00000062", {127462, 127464, 98}, {4, 6, 7}}, + {L"\U00000061\U0001f1e6\U0001f1e7\U0001f1e8\U00000062", {97, 127462, 127464, 98}, {1, 5, 7, 8}}, + {L"\U00000061\U0001f1e6\U0001f1e7\U0000200d\U0001f1e8\U00000062", {97, 127462, 127464, 98}, {1, 6, 8, 9}}, + {L"\U00000061\U0001f1e6\U0000200d\U0001f1e7\U0001f1e8\U00000062", {97, 127462, 127463, 98}, {1, 4, 8, 9}}, + {L"\U00000061\U0001f1e6\U0001f1e7\U0001f1e8\U0001f1e9\U00000062", {97, 127462, 127464, 98}, {1, 5, 9, 10}}, + {L"\U00000061\U0000200d", {97}, {2}}, + {L"\U00000061\U00000308\U00000062", {97, 98}, {2, 3}}, + {L"\U00000061\U00000903\U00000062", {97, 98}, {2, 3}}, + {L"\U00000061\U00000600\U00000062", {97, 1536}, {1, 3}}, + {L"\U0001f476\U0001f3ff\U0001f476", {128118, 128118}, {4, 6}}, + {L"\U00000061\U0001f3ff\U0001f476", {97, 128118}, {3, 5}}, + {L"\U00000061\U0001f3ff\U0001f476\U0000200d\U0001f6d1", {97, 128118}, {3, 8}}, + {L"\U0001f476\U0001f3ff\U00000308\U0000200d\U0001f476\U0001f3ff", {128118}, {10}}, + {L"\U0001f6d1\U0000200d\U0001f6d1", {128721}, {5}}, + {L"\U00000061\U0000200d\U0001f6d1", {97, 128721}, {2, 4}}, + {L"\U00002701\U0000200d\U00002701", {9985}, {3}}, + {L"\U00000061\U0000200d\U00002701", {97, 9985}, {2, 3}}}}; + +/// The data for UTF-8. +/// +/// Note that most of the data for the UTF-16 and UTF-32 are identical. However +/// since the size of the code units differ the breaks can contain different +/// values. +std::array, 602> data_utf32 = { + {{L"\U00000020\U00000020", {32, 32}, {1, 2}}, + {L"\U00000020\U00000308\U00000020", {32, 32}, {2, 3}}, + {L"\U00000020\U0000000d", {32, 13}, {1, 2}}, + {L"\U00000020\U00000308\U0000000d", {32, 13}, {2, 3}}, + {L"\U00000020\U0000000a", {32, 10}, {1, 2}}, + {L"\U00000020\U00000308\U0000000a", {32, 10}, {2, 3}}, + {L"\U00000020\U00000001", {32, 1}, {1, 2}}, + {L"\U00000020\U00000308\U00000001", {32, 1}, {2, 3}}, + {L"\U00000020\U0000034f", {32}, {2}}, + {L"\U00000020\U00000308\U0000034f", {32}, {3}}, + {L"\U00000020\U0001f1e6", {32, 127462}, {1, 2}}, + {L"\U00000020\U00000308\U0001f1e6", {32, 127462}, {2, 3}}, + {L"\U00000020\U00000600", {32, 1536}, {1, 2}}, + {L"\U00000020\U00000308\U00000600", {32, 1536}, {2, 3}}, + {L"\U00000020\U00000903", {32}, {2}}, + {L"\U00000020\U00000308\U00000903", {32}, {3}}, + {L"\U00000020\U00001100", {32, 4352}, {1, 2}}, + {L"\U00000020\U00000308\U00001100", {32, 4352}, {2, 3}}, + {L"\U00000020\U00001160", {32, 4448}, {1, 2}}, + {L"\U00000020\U00000308\U00001160", {32, 4448}, {2, 3}}, + {L"\U00000020\U000011a8", {32, 4520}, {1, 2}}, + {L"\U00000020\U00000308\U000011a8", {32, 4520}, {2, 3}}, + {L"\U00000020\U0000ac00", {32, 44032}, {1, 2}}, + {L"\U00000020\U00000308\U0000ac00", {32, 44032}, {2, 3}}, + {L"\U00000020\U0000ac01", {32, 44033}, {1, 2}}, + {L"\U00000020\U00000308\U0000ac01", {32, 44033}, {2, 3}}, + {L"\U00000020\U0000231a", {32, 8986}, {1, 2}}, + {L"\U00000020\U00000308\U0000231a", {32, 8986}, {2, 3}}, + {L"\U00000020\U00000300", {32}, {2}}, + {L"\U00000020\U00000308\U00000300", {32}, {3}}, + {L"\U00000020\U0000200d", {32}, {2}}, + {L"\U00000020\U00000308\U0000200d", {32}, {3}}, + {L"\U00000020\U00000378", {32, 888}, {1, 2}}, + {L"\U00000020\U00000308\U00000378", {32, 888}, {2, 3}}, + {L"\U0000000d\U00000020", {13, 32}, {1, 2}}, + {L"\U0000000d\U00000308\U00000020", {13, 776, 32}, {1, 2, 3}}, + {L"\U0000000d\U0000000d", {13, 13}, {1, 2}}, + {L"\U0000000d\U00000308\U0000000d", {13, 776, 13}, {1, 2, 3}}, + {L"\U0000000d\U0000000a", {13}, {2}}, + {L"\U0000000d\U00000308\U0000000a", {13, 776, 10}, {1, 2, 3}}, + {L"\U0000000d\U00000001", {13, 1}, {1, 2}}, + {L"\U0000000d\U00000308\U00000001", {13, 776, 1}, {1, 2, 3}}, + {L"\U0000000d\U0000034f", {13, 847}, {1, 2}}, + {L"\U0000000d\U00000308\U0000034f", {13, 776}, {1, 3}}, + {L"\U0000000d\U0001f1e6", {13, 127462}, {1, 2}}, + {L"\U0000000d\U00000308\U0001f1e6", {13, 776, 127462}, {1, 2, 3}}, + {L"\U0000000d\U00000600", {13, 1536}, {1, 2}}, + {L"\U0000000d\U00000308\U00000600", {13, 776, 1536}, {1, 2, 3}}, + {L"\U0000000d\U00000903", {13, 2307}, {1, 2}}, + {L"\U0000000d\U00000308\U00000903", {13, 776}, {1, 3}}, + {L"\U0000000d\U00001100", {13, 4352}, {1, 2}}, + {L"\U0000000d\U00000308\U00001100", {13, 776, 4352}, {1, 2, 3}}, + {L"\U0000000d\U00001160", {13, 4448}, {1, 2}}, + {L"\U0000000d\U00000308\U00001160", {13, 776, 4448}, {1, 2, 3}}, + {L"\U0000000d\U000011a8", {13, 4520}, {1, 2}}, + {L"\U0000000d\U00000308\U000011a8", {13, 776, 4520}, {1, 2, 3}}, + {L"\U0000000d\U0000ac00", {13, 44032}, {1, 2}}, + {L"\U0000000d\U00000308\U0000ac00", {13, 776, 44032}, {1, 2, 3}}, + {L"\U0000000d\U0000ac01", {13, 44033}, {1, 2}}, + {L"\U0000000d\U00000308\U0000ac01", {13, 776, 44033}, {1, 2, 3}}, + {L"\U0000000d\U0000231a", {13, 8986}, {1, 2}}, + {L"\U0000000d\U00000308\U0000231a", {13, 776, 8986}, {1, 2, 3}}, + {L"\U0000000d\U00000300", {13, 768}, {1, 2}}, + {L"\U0000000d\U00000308\U00000300", {13, 776}, {1, 3}}, + {L"\U0000000d\U0000200d", {13, 8205}, {1, 2}}, + {L"\U0000000d\U00000308\U0000200d", {13, 776}, {1, 3}}, + {L"\U0000000d\U00000378", {13, 888}, {1, 2}}, + {L"\U0000000d\U00000308\U00000378", {13, 776, 888}, {1, 2, 3}}, + {L"\U0000000a\U00000020", {10, 32}, {1, 2}}, + {L"\U0000000a\U00000308\U00000020", {10, 776, 32}, {1, 2, 3}}, + {L"\U0000000a\U0000000d", {10, 13}, {1, 2}}, + {L"\U0000000a\U00000308\U0000000d", {10, 776, 13}, {1, 2, 3}}, + {L"\U0000000a\U0000000a", {10, 10}, {1, 2}}, + {L"\U0000000a\U00000308\U0000000a", {10, 776, 10}, {1, 2, 3}}, + {L"\U0000000a\U00000001", {10, 1}, {1, 2}}, + {L"\U0000000a\U00000308\U00000001", {10, 776, 1}, {1, 2, 3}}, + {L"\U0000000a\U0000034f", {10, 847}, {1, 2}}, + {L"\U0000000a\U00000308\U0000034f", {10, 776}, {1, 3}}, + {L"\U0000000a\U0001f1e6", {10, 127462}, {1, 2}}, + {L"\U0000000a\U00000308\U0001f1e6", {10, 776, 127462}, {1, 2, 3}}, + {L"\U0000000a\U00000600", {10, 1536}, {1, 2}}, + {L"\U0000000a\U00000308\U00000600", {10, 776, 1536}, {1, 2, 3}}, + {L"\U0000000a\U00000903", {10, 2307}, {1, 2}}, + {L"\U0000000a\U00000308\U00000903", {10, 776}, {1, 3}}, + {L"\U0000000a\U00001100", {10, 4352}, {1, 2}}, + {L"\U0000000a\U00000308\U00001100", {10, 776, 4352}, {1, 2, 3}}, + {L"\U0000000a\U00001160", {10, 4448}, {1, 2}}, + {L"\U0000000a\U00000308\U00001160", {10, 776, 4448}, {1, 2, 3}}, + {L"\U0000000a\U000011a8", {10, 4520}, {1, 2}}, + {L"\U0000000a\U00000308\U000011a8", {10, 776, 4520}, {1, 2, 3}}, + {L"\U0000000a\U0000ac00", {10, 44032}, {1, 2}}, + {L"\U0000000a\U00000308\U0000ac00", {10, 776, 44032}, {1, 2, 3}}, + {L"\U0000000a\U0000ac01", {10, 44033}, {1, 2}}, + {L"\U0000000a\U00000308\U0000ac01", {10, 776, 44033}, {1, 2, 3}}, + {L"\U0000000a\U0000231a", {10, 8986}, {1, 2}}, + {L"\U0000000a\U00000308\U0000231a", {10, 776, 8986}, {1, 2, 3}}, + {L"\U0000000a\U00000300", {10, 768}, {1, 2}}, + {L"\U0000000a\U00000308\U00000300", {10, 776}, {1, 3}}, + {L"\U0000000a\U0000200d", {10, 8205}, {1, 2}}, + {L"\U0000000a\U00000308\U0000200d", {10, 776}, {1, 3}}, + {L"\U0000000a\U00000378", {10, 888}, {1, 2}}, + {L"\U0000000a\U00000308\U00000378", {10, 776, 888}, {1, 2, 3}}, + {L"\U00000001\U00000020", {1, 32}, {1, 2}}, + {L"\U00000001\U00000308\U00000020", {1, 776, 32}, {1, 2, 3}}, + {L"\U00000001\U0000000d", {1, 13}, {1, 2}}, + {L"\U00000001\U00000308\U0000000d", {1, 776, 13}, {1, 2, 3}}, + {L"\U00000001\U0000000a", {1, 10}, {1, 2}}, + {L"\U00000001\U00000308\U0000000a", {1, 776, 10}, {1, 2, 3}}, + {L"\U00000001\U00000001", {1, 1}, {1, 2}}, + {L"\U00000001\U00000308\U00000001", {1, 776, 1}, {1, 2, 3}}, + {L"\U00000001\U0000034f", {1, 847}, {1, 2}}, + {L"\U00000001\U00000308\U0000034f", {1, 776}, {1, 3}}, + {L"\U00000001\U0001f1e6", {1, 127462}, {1, 2}}, + {L"\U00000001\U00000308\U0001f1e6", {1, 776, 127462}, {1, 2, 3}}, + {L"\U00000001\U00000600", {1, 1536}, {1, 2}}, + {L"\U00000001\U00000308\U00000600", {1, 776, 1536}, {1, 2, 3}}, + {L"\U00000001\U00000903", {1, 2307}, {1, 2}}, + {L"\U00000001\U00000308\U00000903", {1, 776}, {1, 3}}, + {L"\U00000001\U00001100", {1, 4352}, {1, 2}}, + {L"\U00000001\U00000308\U00001100", {1, 776, 4352}, {1, 2, 3}}, + {L"\U00000001\U00001160", {1, 4448}, {1, 2}}, + {L"\U00000001\U00000308\U00001160", {1, 776, 4448}, {1, 2, 3}}, + {L"\U00000001\U000011a8", {1, 4520}, {1, 2}}, + {L"\U00000001\U00000308\U000011a8", {1, 776, 4520}, {1, 2, 3}}, + {L"\U00000001\U0000ac00", {1, 44032}, {1, 2}}, + {L"\U00000001\U00000308\U0000ac00", {1, 776, 44032}, {1, 2, 3}}, + {L"\U00000001\U0000ac01", {1, 44033}, {1, 2}}, + {L"\U00000001\U00000308\U0000ac01", {1, 776, 44033}, {1, 2, 3}}, + {L"\U00000001\U0000231a", {1, 8986}, {1, 2}}, + {L"\U00000001\U00000308\U0000231a", {1, 776, 8986}, {1, 2, 3}}, + {L"\U00000001\U00000300", {1, 768}, {1, 2}}, + {L"\U00000001\U00000308\U00000300", {1, 776}, {1, 3}}, + {L"\U00000001\U0000200d", {1, 8205}, {1, 2}}, + {L"\U00000001\U00000308\U0000200d", {1, 776}, {1, 3}}, + {L"\U00000001\U00000378", {1, 888}, {1, 2}}, + {L"\U00000001\U00000308\U00000378", {1, 776, 888}, {1, 2, 3}}, + {L"\U0000034f\U00000020", {847, 32}, {1, 2}}, + {L"\U0000034f\U00000308\U00000020", {847, 32}, {2, 3}}, + {L"\U0000034f\U0000000d", {847, 13}, {1, 2}}, + {L"\U0000034f\U00000308\U0000000d", {847, 13}, {2, 3}}, + {L"\U0000034f\U0000000a", {847, 10}, {1, 2}}, + {L"\U0000034f\U00000308\U0000000a", {847, 10}, {2, 3}}, + {L"\U0000034f\U00000001", {847, 1}, {1, 2}}, + {L"\U0000034f\U00000308\U00000001", {847, 1}, {2, 3}}, + {L"\U0000034f\U0000034f", {847}, {2}}, + {L"\U0000034f\U00000308\U0000034f", {847}, {3}}, + {L"\U0000034f\U0001f1e6", {847, 127462}, {1, 2}}, + {L"\U0000034f\U00000308\U0001f1e6", {847, 127462}, {2, 3}}, + {L"\U0000034f\U00000600", {847, 1536}, {1, 2}}, + {L"\U0000034f\U00000308\U00000600", {847, 1536}, {2, 3}}, + {L"\U0000034f\U00000903", {847}, {2}}, + {L"\U0000034f\U00000308\U00000903", {847}, {3}}, + {L"\U0000034f\U00001100", {847, 4352}, {1, 2}}, + {L"\U0000034f\U00000308\U00001100", {847, 4352}, {2, 3}}, + {L"\U0000034f\U00001160", {847, 4448}, {1, 2}}, + {L"\U0000034f\U00000308\U00001160", {847, 4448}, {2, 3}}, + {L"\U0000034f\U000011a8", {847, 4520}, {1, 2}}, + {L"\U0000034f\U00000308\U000011a8", {847, 4520}, {2, 3}}, + {L"\U0000034f\U0000ac00", {847, 44032}, {1, 2}}, + {L"\U0000034f\U00000308\U0000ac00", {847, 44032}, {2, 3}}, + {L"\U0000034f\U0000ac01", {847, 44033}, {1, 2}}, + {L"\U0000034f\U00000308\U0000ac01", {847, 44033}, {2, 3}}, + {L"\U0000034f\U0000231a", {847, 8986}, {1, 2}}, + {L"\U0000034f\U00000308\U0000231a", {847, 8986}, {2, 3}}, + {L"\U0000034f\U00000300", {847}, {2}}, + {L"\U0000034f\U00000308\U00000300", {847}, {3}}, + {L"\U0000034f\U0000200d", {847}, {2}}, + {L"\U0000034f\U00000308\U0000200d", {847}, {3}}, + {L"\U0000034f\U00000378", {847, 888}, {1, 2}}, + {L"\U0000034f\U00000308\U00000378", {847, 888}, {2, 3}}, + {L"\U0001f1e6\U00000020", {127462, 32}, {1, 2}}, + {L"\U0001f1e6\U00000308\U00000020", {127462, 32}, {2, 3}}, + {L"\U0001f1e6\U0000000d", {127462, 13}, {1, 2}}, + {L"\U0001f1e6\U00000308\U0000000d", {127462, 13}, {2, 3}}, + {L"\U0001f1e6\U0000000a", {127462, 10}, {1, 2}}, + {L"\U0001f1e6\U00000308\U0000000a", {127462, 10}, {2, 3}}, + {L"\U0001f1e6\U00000001", {127462, 1}, {1, 2}}, + {L"\U0001f1e6\U00000308\U00000001", {127462, 1}, {2, 3}}, + {L"\U0001f1e6\U0000034f", {127462}, {2}}, + {L"\U0001f1e6\U00000308\U0000034f", {127462}, {3}}, + {L"\U0001f1e6\U0001f1e6", {127462}, {2}}, + {L"\U0001f1e6\U00000308\U0001f1e6", {127462, 127462}, {2, 3}}, + {L"\U0001f1e6\U00000600", {127462, 1536}, {1, 2}}, + {L"\U0001f1e6\U00000308\U00000600", {127462, 1536}, {2, 3}}, + {L"\U0001f1e6\U00000903", {127462}, {2}}, + {L"\U0001f1e6\U00000308\U00000903", {127462}, {3}}, + {L"\U0001f1e6\U00001100", {127462, 4352}, {1, 2}}, + {L"\U0001f1e6\U00000308\U00001100", {127462, 4352}, {2, 3}}, + {L"\U0001f1e6\U00001160", {127462, 4448}, {1, 2}}, + {L"\U0001f1e6\U00000308\U00001160", {127462, 4448}, {2, 3}}, + {L"\U0001f1e6\U000011a8", {127462, 4520}, {1, 2}}, + {L"\U0001f1e6\U00000308\U000011a8", {127462, 4520}, {2, 3}}, + {L"\U0001f1e6\U0000ac00", {127462, 44032}, {1, 2}}, + {L"\U0001f1e6\U00000308\U0000ac00", {127462, 44032}, {2, 3}}, + {L"\U0001f1e6\U0000ac01", {127462, 44033}, {1, 2}}, + {L"\U0001f1e6\U00000308\U0000ac01", {127462, 44033}, {2, 3}}, + {L"\U0001f1e6\U0000231a", {127462, 8986}, {1, 2}}, + {L"\U0001f1e6\U00000308\U0000231a", {127462, 8986}, {2, 3}}, + {L"\U0001f1e6\U00000300", {127462}, {2}}, + {L"\U0001f1e6\U00000308\U00000300", {127462}, {3}}, + {L"\U0001f1e6\U0000200d", {127462}, {2}}, + {L"\U0001f1e6\U00000308\U0000200d", {127462}, {3}}, + {L"\U0001f1e6\U00000378", {127462, 888}, {1, 2}}, + {L"\U0001f1e6\U00000308\U00000378", {127462, 888}, {2, 3}}, + {L"\U00000600\U00000020", {1536}, {2}}, + {L"\U00000600\U00000308\U00000020", {1536, 32}, {2, 3}}, + {L"\U00000600\U0000000d", {1536, 13}, {1, 2}}, + {L"\U00000600\U00000308\U0000000d", {1536, 13}, {2, 3}}, + {L"\U00000600\U0000000a", {1536, 10}, {1, 2}}, + {L"\U00000600\U00000308\U0000000a", {1536, 10}, {2, 3}}, + {L"\U00000600\U00000001", {1536, 1}, {1, 2}}, + {L"\U00000600\U00000308\U00000001", {1536, 1}, {2, 3}}, + {L"\U00000600\U0000034f", {1536}, {2}}, + {L"\U00000600\U00000308\U0000034f", {1536}, {3}}, + {L"\U00000600\U0001f1e6", {1536}, {2}}, + {L"\U00000600\U00000308\U0001f1e6", {1536, 127462}, {2, 3}}, + {L"\U00000600\U00000600", {1536}, {2}}, + {L"\U00000600\U00000308\U00000600", {1536, 1536}, {2, 3}}, + {L"\U00000600\U00000903", {1536}, {2}}, + {L"\U00000600\U00000308\U00000903", {1536}, {3}}, + {L"\U00000600\U00001100", {1536}, {2}}, + {L"\U00000600\U00000308\U00001100", {1536, 4352}, {2, 3}}, + {L"\U00000600\U00001160", {1536}, {2}}, + {L"\U00000600\U00000308\U00001160", {1536, 4448}, {2, 3}}, + {L"\U00000600\U000011a8", {1536}, {2}}, + {L"\U00000600\U00000308\U000011a8", {1536, 4520}, {2, 3}}, + {L"\U00000600\U0000ac00", {1536}, {2}}, + {L"\U00000600\U00000308\U0000ac00", {1536, 44032}, {2, 3}}, + {L"\U00000600\U0000ac01", {1536}, {2}}, + {L"\U00000600\U00000308\U0000ac01", {1536, 44033}, {2, 3}}, + {L"\U00000600\U0000231a", {1536}, {2}}, + {L"\U00000600\U00000308\U0000231a", {1536, 8986}, {2, 3}}, + {L"\U00000600\U00000300", {1536}, {2}}, + {L"\U00000600\U00000308\U00000300", {1536}, {3}}, + {L"\U00000600\U0000200d", {1536}, {2}}, + {L"\U00000600\U00000308\U0000200d", {1536}, {3}}, + {L"\U00000600\U00000378", {1536}, {2}}, + {L"\U00000600\U00000308\U00000378", {1536, 888}, {2, 3}}, + {L"\U00000903\U00000020", {2307, 32}, {1, 2}}, + {L"\U00000903\U00000308\U00000020", {2307, 32}, {2, 3}}, + {L"\U00000903\U0000000d", {2307, 13}, {1, 2}}, + {L"\U00000903\U00000308\U0000000d", {2307, 13}, {2, 3}}, + {L"\U00000903\U0000000a", {2307, 10}, {1, 2}}, + {L"\U00000903\U00000308\U0000000a", {2307, 10}, {2, 3}}, + {L"\U00000903\U00000001", {2307, 1}, {1, 2}}, + {L"\U00000903\U00000308\U00000001", {2307, 1}, {2, 3}}, + {L"\U00000903\U0000034f", {2307}, {2}}, + {L"\U00000903\U00000308\U0000034f", {2307}, {3}}, + {L"\U00000903\U0001f1e6", {2307, 127462}, {1, 2}}, + {L"\U00000903\U00000308\U0001f1e6", {2307, 127462}, {2, 3}}, + {L"\U00000903\U00000600", {2307, 1536}, {1, 2}}, + {L"\U00000903\U00000308\U00000600", {2307, 1536}, {2, 3}}, + {L"\U00000903\U00000903", {2307}, {2}}, + {L"\U00000903\U00000308\U00000903", {2307}, {3}}, + {L"\U00000903\U00001100", {2307, 4352}, {1, 2}}, + {L"\U00000903\U00000308\U00001100", {2307, 4352}, {2, 3}}, + {L"\U00000903\U00001160", {2307, 4448}, {1, 2}}, + {L"\U00000903\U00000308\U00001160", {2307, 4448}, {2, 3}}, + {L"\U00000903\U000011a8", {2307, 4520}, {1, 2}}, + {L"\U00000903\U00000308\U000011a8", {2307, 4520}, {2, 3}}, + {L"\U00000903\U0000ac00", {2307, 44032}, {1, 2}}, + {L"\U00000903\U00000308\U0000ac00", {2307, 44032}, {2, 3}}, + {L"\U00000903\U0000ac01", {2307, 44033}, {1, 2}}, + {L"\U00000903\U00000308\U0000ac01", {2307, 44033}, {2, 3}}, + {L"\U00000903\U0000231a", {2307, 8986}, {1, 2}}, + {L"\U00000903\U00000308\U0000231a", {2307, 8986}, {2, 3}}, + {L"\U00000903\U00000300", {2307}, {2}}, + {L"\U00000903\U00000308\U00000300", {2307}, {3}}, + {L"\U00000903\U0000200d", {2307}, {2}}, + {L"\U00000903\U00000308\U0000200d", {2307}, {3}}, + {L"\U00000903\U00000378", {2307, 888}, {1, 2}}, + {L"\U00000903\U00000308\U00000378", {2307, 888}, {2, 3}}, + {L"\U00001100\U00000020", {4352, 32}, {1, 2}}, + {L"\U00001100\U00000308\U00000020", {4352, 32}, {2, 3}}, + {L"\U00001100\U0000000d", {4352, 13}, {1, 2}}, + {L"\U00001100\U00000308\U0000000d", {4352, 13}, {2, 3}}, + {L"\U00001100\U0000000a", {4352, 10}, {1, 2}}, + {L"\U00001100\U00000308\U0000000a", {4352, 10}, {2, 3}}, + {L"\U00001100\U00000001", {4352, 1}, {1, 2}}, + {L"\U00001100\U00000308\U00000001", {4352, 1}, {2, 3}}, + {L"\U00001100\U0000034f", {4352}, {2}}, + {L"\U00001100\U00000308\U0000034f", {4352}, {3}}, + {L"\U00001100\U0001f1e6", {4352, 127462}, {1, 2}}, + {L"\U00001100\U00000308\U0001f1e6", {4352, 127462}, {2, 3}}, + {L"\U00001100\U00000600", {4352, 1536}, {1, 2}}, + {L"\U00001100\U00000308\U00000600", {4352, 1536}, {2, 3}}, + {L"\U00001100\U00000903", {4352}, {2}}, + {L"\U00001100\U00000308\U00000903", {4352}, {3}}, + {L"\U00001100\U00001100", {4352}, {2}}, + {L"\U00001100\U00000308\U00001100", {4352, 4352}, {2, 3}}, + {L"\U00001100\U00001160", {4352}, {2}}, + {L"\U00001100\U00000308\U00001160", {4352, 4448}, {2, 3}}, + {L"\U00001100\U000011a8", {4352, 4520}, {1, 2}}, + {L"\U00001100\U00000308\U000011a8", {4352, 4520}, {2, 3}}, + {L"\U00001100\U0000ac00", {4352}, {2}}, + {L"\U00001100\U00000308\U0000ac00", {4352, 44032}, {2, 3}}, + {L"\U00001100\U0000ac01", {4352}, {2}}, + {L"\U00001100\U00000308\U0000ac01", {4352, 44033}, {2, 3}}, + {L"\U00001100\U0000231a", {4352, 8986}, {1, 2}}, + {L"\U00001100\U00000308\U0000231a", {4352, 8986}, {2, 3}}, + {L"\U00001100\U00000300", {4352}, {2}}, + {L"\U00001100\U00000308\U00000300", {4352}, {3}}, + {L"\U00001100\U0000200d", {4352}, {2}}, + {L"\U00001100\U00000308\U0000200d", {4352}, {3}}, + {L"\U00001100\U00000378", {4352, 888}, {1, 2}}, + {L"\U00001100\U00000308\U00000378", {4352, 888}, {2, 3}}, + {L"\U00001160\U00000020", {4448, 32}, {1, 2}}, + {L"\U00001160\U00000308\U00000020", {4448, 32}, {2, 3}}, + {L"\U00001160\U0000000d", {4448, 13}, {1, 2}}, + {L"\U00001160\U00000308\U0000000d", {4448, 13}, {2, 3}}, + {L"\U00001160\U0000000a", {4448, 10}, {1, 2}}, + {L"\U00001160\U00000308\U0000000a", {4448, 10}, {2, 3}}, + {L"\U00001160\U00000001", {4448, 1}, {1, 2}}, + {L"\U00001160\U00000308\U00000001", {4448, 1}, {2, 3}}, + {L"\U00001160\U0000034f", {4448}, {2}}, + {L"\U00001160\U00000308\U0000034f", {4448}, {3}}, + {L"\U00001160\U0001f1e6", {4448, 127462}, {1, 2}}, + {L"\U00001160\U00000308\U0001f1e6", {4448, 127462}, {2, 3}}, + {L"\U00001160\U00000600", {4448, 1536}, {1, 2}}, + {L"\U00001160\U00000308\U00000600", {4448, 1536}, {2, 3}}, + {L"\U00001160\U00000903", {4448}, {2}}, + {L"\U00001160\U00000308\U00000903", {4448}, {3}}, + {L"\U00001160\U00001100", {4448, 4352}, {1, 2}}, + {L"\U00001160\U00000308\U00001100", {4448, 4352}, {2, 3}}, + {L"\U00001160\U00001160", {4448}, {2}}, + {L"\U00001160\U00000308\U00001160", {4448, 4448}, {2, 3}}, + {L"\U00001160\U000011a8", {4448}, {2}}, + {L"\U00001160\U00000308\U000011a8", {4448, 4520}, {2, 3}}, + {L"\U00001160\U0000ac00", {4448, 44032}, {1, 2}}, + {L"\U00001160\U00000308\U0000ac00", {4448, 44032}, {2, 3}}, + {L"\U00001160\U0000ac01", {4448, 44033}, {1, 2}}, + {L"\U00001160\U00000308\U0000ac01", {4448, 44033}, {2, 3}}, + {L"\U00001160\U0000231a", {4448, 8986}, {1, 2}}, + {L"\U00001160\U00000308\U0000231a", {4448, 8986}, {2, 3}}, + {L"\U00001160\U00000300", {4448}, {2}}, + {L"\U00001160\U00000308\U00000300", {4448}, {3}}, + {L"\U00001160\U0000200d", {4448}, {2}}, + {L"\U00001160\U00000308\U0000200d", {4448}, {3}}, + {L"\U00001160\U00000378", {4448, 888}, {1, 2}}, + {L"\U00001160\U00000308\U00000378", {4448, 888}, {2, 3}}, + {L"\U000011a8\U00000020", {4520, 32}, {1, 2}}, + {L"\U000011a8\U00000308\U00000020", {4520, 32}, {2, 3}}, + {L"\U000011a8\U0000000d", {4520, 13}, {1, 2}}, + {L"\U000011a8\U00000308\U0000000d", {4520, 13}, {2, 3}}, + {L"\U000011a8\U0000000a", {4520, 10}, {1, 2}}, + {L"\U000011a8\U00000308\U0000000a", {4520, 10}, {2, 3}}, + {L"\U000011a8\U00000001", {4520, 1}, {1, 2}}, + {L"\U000011a8\U00000308\U00000001", {4520, 1}, {2, 3}}, + {L"\U000011a8\U0000034f", {4520}, {2}}, + {L"\U000011a8\U00000308\U0000034f", {4520}, {3}}, + {L"\U000011a8\U0001f1e6", {4520, 127462}, {1, 2}}, + {L"\U000011a8\U00000308\U0001f1e6", {4520, 127462}, {2, 3}}, + {L"\U000011a8\U00000600", {4520, 1536}, {1, 2}}, + {L"\U000011a8\U00000308\U00000600", {4520, 1536}, {2, 3}}, + {L"\U000011a8\U00000903", {4520}, {2}}, + {L"\U000011a8\U00000308\U00000903", {4520}, {3}}, + {L"\U000011a8\U00001100", {4520, 4352}, {1, 2}}, + {L"\U000011a8\U00000308\U00001100", {4520, 4352}, {2, 3}}, + {L"\U000011a8\U00001160", {4520, 4448}, {1, 2}}, + {L"\U000011a8\U00000308\U00001160", {4520, 4448}, {2, 3}}, + {L"\U000011a8\U000011a8", {4520}, {2}}, + {L"\U000011a8\U00000308\U000011a8", {4520, 4520}, {2, 3}}, + {L"\U000011a8\U0000ac00", {4520, 44032}, {1, 2}}, + {L"\U000011a8\U00000308\U0000ac00", {4520, 44032}, {2, 3}}, + {L"\U000011a8\U0000ac01", {4520, 44033}, {1, 2}}, + {L"\U000011a8\U00000308\U0000ac01", {4520, 44033}, {2, 3}}, + {L"\U000011a8\U0000231a", {4520, 8986}, {1, 2}}, + {L"\U000011a8\U00000308\U0000231a", {4520, 8986}, {2, 3}}, + {L"\U000011a8\U00000300", {4520}, {2}}, + {L"\U000011a8\U00000308\U00000300", {4520}, {3}}, + {L"\U000011a8\U0000200d", {4520}, {2}}, + {L"\U000011a8\U00000308\U0000200d", {4520}, {3}}, + {L"\U000011a8\U00000378", {4520, 888}, {1, 2}}, + {L"\U000011a8\U00000308\U00000378", {4520, 888}, {2, 3}}, + {L"\U0000ac00\U00000020", {44032, 32}, {1, 2}}, + {L"\U0000ac00\U00000308\U00000020", {44032, 32}, {2, 3}}, + {L"\U0000ac00\U0000000d", {44032, 13}, {1, 2}}, + {L"\U0000ac00\U00000308\U0000000d", {44032, 13}, {2, 3}}, + {L"\U0000ac00\U0000000a", {44032, 10}, {1, 2}}, + {L"\U0000ac00\U00000308\U0000000a", {44032, 10}, {2, 3}}, + {L"\U0000ac00\U00000001", {44032, 1}, {1, 2}}, + {L"\U0000ac00\U00000308\U00000001", {44032, 1}, {2, 3}}, + {L"\U0000ac00\U0000034f", {44032}, {2}}, + {L"\U0000ac00\U00000308\U0000034f", {44032}, {3}}, + {L"\U0000ac00\U0001f1e6", {44032, 127462}, {1, 2}}, + {L"\U0000ac00\U00000308\U0001f1e6", {44032, 127462}, {2, 3}}, + {L"\U0000ac00\U00000600", {44032, 1536}, {1, 2}}, + {L"\U0000ac00\U00000308\U00000600", {44032, 1536}, {2, 3}}, + {L"\U0000ac00\U00000903", {44032}, {2}}, + {L"\U0000ac00\U00000308\U00000903", {44032}, {3}}, + {L"\U0000ac00\U00001100", {44032, 4352}, {1, 2}}, + {L"\U0000ac00\U00000308\U00001100", {44032, 4352}, {2, 3}}, + {L"\U0000ac00\U00001160", {44032}, {2}}, + {L"\U0000ac00\U00000308\U00001160", {44032, 4448}, {2, 3}}, + {L"\U0000ac00\U000011a8", {44032}, {2}}, + {L"\U0000ac00\U00000308\U000011a8", {44032, 4520}, {2, 3}}, + {L"\U0000ac00\U0000ac00", {44032, 44032}, {1, 2}}, + {L"\U0000ac00\U00000308\U0000ac00", {44032, 44032}, {2, 3}}, + {L"\U0000ac00\U0000ac01", {44032, 44033}, {1, 2}}, + {L"\U0000ac00\U00000308\U0000ac01", {44032, 44033}, {2, 3}}, + {L"\U0000ac00\U0000231a", {44032, 8986}, {1, 2}}, + {L"\U0000ac00\U00000308\U0000231a", {44032, 8986}, {2, 3}}, + {L"\U0000ac00\U00000300", {44032}, {2}}, + {L"\U0000ac00\U00000308\U00000300", {44032}, {3}}, + {L"\U0000ac00\U0000200d", {44032}, {2}}, + {L"\U0000ac00\U00000308\U0000200d", {44032}, {3}}, + {L"\U0000ac00\U00000378", {44032, 888}, {1, 2}}, + {L"\U0000ac00\U00000308\U00000378", {44032, 888}, {2, 3}}, + {L"\U0000ac01\U00000020", {44033, 32}, {1, 2}}, + {L"\U0000ac01\U00000308\U00000020", {44033, 32}, {2, 3}}, + {L"\U0000ac01\U0000000d", {44033, 13}, {1, 2}}, + {L"\U0000ac01\U00000308\U0000000d", {44033, 13}, {2, 3}}, + {L"\U0000ac01\U0000000a", {44033, 10}, {1, 2}}, + {L"\U0000ac01\U00000308\U0000000a", {44033, 10}, {2, 3}}, + {L"\U0000ac01\U00000001", {44033, 1}, {1, 2}}, + {L"\U0000ac01\U00000308\U00000001", {44033, 1}, {2, 3}}, + {L"\U0000ac01\U0000034f", {44033}, {2}}, + {L"\U0000ac01\U00000308\U0000034f", {44033}, {3}}, + {L"\U0000ac01\U0001f1e6", {44033, 127462}, {1, 2}}, + {L"\U0000ac01\U00000308\U0001f1e6", {44033, 127462}, {2, 3}}, + {L"\U0000ac01\U00000600", {44033, 1536}, {1, 2}}, + {L"\U0000ac01\U00000308\U00000600", {44033, 1536}, {2, 3}}, + {L"\U0000ac01\U00000903", {44033}, {2}}, + {L"\U0000ac01\U00000308\U00000903", {44033}, {3}}, + {L"\U0000ac01\U00001100", {44033, 4352}, {1, 2}}, + {L"\U0000ac01\U00000308\U00001100", {44033, 4352}, {2, 3}}, + {L"\U0000ac01\U00001160", {44033, 4448}, {1, 2}}, + {L"\U0000ac01\U00000308\U00001160", {44033, 4448}, {2, 3}}, + {L"\U0000ac01\U000011a8", {44033}, {2}}, + {L"\U0000ac01\U00000308\U000011a8", {44033, 4520}, {2, 3}}, + {L"\U0000ac01\U0000ac00", {44033, 44032}, {1, 2}}, + {L"\U0000ac01\U00000308\U0000ac00", {44033, 44032}, {2, 3}}, + {L"\U0000ac01\U0000ac01", {44033, 44033}, {1, 2}}, + {L"\U0000ac01\U00000308\U0000ac01", {44033, 44033}, {2, 3}}, + {L"\U0000ac01\U0000231a", {44033, 8986}, {1, 2}}, + {L"\U0000ac01\U00000308\U0000231a", {44033, 8986}, {2, 3}}, + {L"\U0000ac01\U00000300", {44033}, {2}}, + {L"\U0000ac01\U00000308\U00000300", {44033}, {3}}, + {L"\U0000ac01\U0000200d", {44033}, {2}}, + {L"\U0000ac01\U00000308\U0000200d", {44033}, {3}}, + {L"\U0000ac01\U00000378", {44033, 888}, {1, 2}}, + {L"\U0000ac01\U00000308\U00000378", {44033, 888}, {2, 3}}, + {L"\U0000231a\U00000020", {8986, 32}, {1, 2}}, + {L"\U0000231a\U00000308\U00000020", {8986, 32}, {2, 3}}, + {L"\U0000231a\U0000000d", {8986, 13}, {1, 2}}, + {L"\U0000231a\U00000308\U0000000d", {8986, 13}, {2, 3}}, + {L"\U0000231a\U0000000a", {8986, 10}, {1, 2}}, + {L"\U0000231a\U00000308\U0000000a", {8986, 10}, {2, 3}}, + {L"\U0000231a\U00000001", {8986, 1}, {1, 2}}, + {L"\U0000231a\U00000308\U00000001", {8986, 1}, {2, 3}}, + {L"\U0000231a\U0000034f", {8986}, {2}}, + {L"\U0000231a\U00000308\U0000034f", {8986}, {3}}, + {L"\U0000231a\U0001f1e6", {8986, 127462}, {1, 2}}, + {L"\U0000231a\U00000308\U0001f1e6", {8986, 127462}, {2, 3}}, + {L"\U0000231a\U00000600", {8986, 1536}, {1, 2}}, + {L"\U0000231a\U00000308\U00000600", {8986, 1536}, {2, 3}}, + {L"\U0000231a\U00000903", {8986}, {2}}, + {L"\U0000231a\U00000308\U00000903", {8986}, {3}}, + {L"\U0000231a\U00001100", {8986, 4352}, {1, 2}}, + {L"\U0000231a\U00000308\U00001100", {8986, 4352}, {2, 3}}, + {L"\U0000231a\U00001160", {8986, 4448}, {1, 2}}, + {L"\U0000231a\U00000308\U00001160", {8986, 4448}, {2, 3}}, + {L"\U0000231a\U000011a8", {8986, 4520}, {1, 2}}, + {L"\U0000231a\U00000308\U000011a8", {8986, 4520}, {2, 3}}, + {L"\U0000231a\U0000ac00", {8986, 44032}, {1, 2}}, + {L"\U0000231a\U00000308\U0000ac00", {8986, 44032}, {2, 3}}, + {L"\U0000231a\U0000ac01", {8986, 44033}, {1, 2}}, + {L"\U0000231a\U00000308\U0000ac01", {8986, 44033}, {2, 3}}, + {L"\U0000231a\U0000231a", {8986, 8986}, {1, 2}}, + {L"\U0000231a\U00000308\U0000231a", {8986, 8986}, {2, 3}}, + {L"\U0000231a\U00000300", {8986}, {2}}, + {L"\U0000231a\U00000308\U00000300", {8986}, {3}}, + {L"\U0000231a\U0000200d", {8986}, {2}}, + {L"\U0000231a\U00000308\U0000200d", {8986}, {3}}, + {L"\U0000231a\U00000378", {8986, 888}, {1, 2}}, + {L"\U0000231a\U00000308\U00000378", {8986, 888}, {2, 3}}, + {L"\U00000300\U00000020", {768, 32}, {1, 2}}, + {L"\U00000300\U00000308\U00000020", {768, 32}, {2, 3}}, + {L"\U00000300\U0000000d", {768, 13}, {1, 2}}, + {L"\U00000300\U00000308\U0000000d", {768, 13}, {2, 3}}, + {L"\U00000300\U0000000a", {768, 10}, {1, 2}}, + {L"\U00000300\U00000308\U0000000a", {768, 10}, {2, 3}}, + {L"\U00000300\U00000001", {768, 1}, {1, 2}}, + {L"\U00000300\U00000308\U00000001", {768, 1}, {2, 3}}, + {L"\U00000300\U0000034f", {768}, {2}}, + {L"\U00000300\U00000308\U0000034f", {768}, {3}}, + {L"\U00000300\U0001f1e6", {768, 127462}, {1, 2}}, + {L"\U00000300\U00000308\U0001f1e6", {768, 127462}, {2, 3}}, + {L"\U00000300\U00000600", {768, 1536}, {1, 2}}, + {L"\U00000300\U00000308\U00000600", {768, 1536}, {2, 3}}, + {L"\U00000300\U00000903", {768}, {2}}, + {L"\U00000300\U00000308\U00000903", {768}, {3}}, + {L"\U00000300\U00001100", {768, 4352}, {1, 2}}, + {L"\U00000300\U00000308\U00001100", {768, 4352}, {2, 3}}, + {L"\U00000300\U00001160", {768, 4448}, {1, 2}}, + {L"\U00000300\U00000308\U00001160", {768, 4448}, {2, 3}}, + {L"\U00000300\U000011a8", {768, 4520}, {1, 2}}, + {L"\U00000300\U00000308\U000011a8", {768, 4520}, {2, 3}}, + {L"\U00000300\U0000ac00", {768, 44032}, {1, 2}}, + {L"\U00000300\U00000308\U0000ac00", {768, 44032}, {2, 3}}, + {L"\U00000300\U0000ac01", {768, 44033}, {1, 2}}, + {L"\U00000300\U00000308\U0000ac01", {768, 44033}, {2, 3}}, + {L"\U00000300\U0000231a", {768, 8986}, {1, 2}}, + {L"\U00000300\U00000308\U0000231a", {768, 8986}, {2, 3}}, + {L"\U00000300\U00000300", {768}, {2}}, + {L"\U00000300\U00000308\U00000300", {768}, {3}}, + {L"\U00000300\U0000200d", {768}, {2}}, + {L"\U00000300\U00000308\U0000200d", {768}, {3}}, + {L"\U00000300\U00000378", {768, 888}, {1, 2}}, + {L"\U00000300\U00000308\U00000378", {768, 888}, {2, 3}}, + {L"\U0000200d\U00000020", {8205, 32}, {1, 2}}, + {L"\U0000200d\U00000308\U00000020", {8205, 32}, {2, 3}}, + {L"\U0000200d\U0000000d", {8205, 13}, {1, 2}}, + {L"\U0000200d\U00000308\U0000000d", {8205, 13}, {2, 3}}, + {L"\U0000200d\U0000000a", {8205, 10}, {1, 2}}, + {L"\U0000200d\U00000308\U0000000a", {8205, 10}, {2, 3}}, + {L"\U0000200d\U00000001", {8205, 1}, {1, 2}}, + {L"\U0000200d\U00000308\U00000001", {8205, 1}, {2, 3}}, + {L"\U0000200d\U0000034f", {8205}, {2}}, + {L"\U0000200d\U00000308\U0000034f", {8205}, {3}}, + {L"\U0000200d\U0001f1e6", {8205, 127462}, {1, 2}}, + {L"\U0000200d\U00000308\U0001f1e6", {8205, 127462}, {2, 3}}, + {L"\U0000200d\U00000600", {8205, 1536}, {1, 2}}, + {L"\U0000200d\U00000308\U00000600", {8205, 1536}, {2, 3}}, + {L"\U0000200d\U00000903", {8205}, {2}}, + {L"\U0000200d\U00000308\U00000903", {8205}, {3}}, + {L"\U0000200d\U00001100", {8205, 4352}, {1, 2}}, + {L"\U0000200d\U00000308\U00001100", {8205, 4352}, {2, 3}}, + {L"\U0000200d\U00001160", {8205, 4448}, {1, 2}}, + {L"\U0000200d\U00000308\U00001160", {8205, 4448}, {2, 3}}, + {L"\U0000200d\U000011a8", {8205, 4520}, {1, 2}}, + {L"\U0000200d\U00000308\U000011a8", {8205, 4520}, {2, 3}}, + {L"\U0000200d\U0000ac00", {8205, 44032}, {1, 2}}, + {L"\U0000200d\U00000308\U0000ac00", {8205, 44032}, {2, 3}}, + {L"\U0000200d\U0000ac01", {8205, 44033}, {1, 2}}, + {L"\U0000200d\U00000308\U0000ac01", {8205, 44033}, {2, 3}}, + {L"\U0000200d\U0000231a", {8205, 8986}, {1, 2}}, + {L"\U0000200d\U00000308\U0000231a", {8205, 8986}, {2, 3}}, + {L"\U0000200d\U00000300", {8205}, {2}}, + {L"\U0000200d\U00000308\U00000300", {8205}, {3}}, + {L"\U0000200d\U0000200d", {8205}, {2}}, + {L"\U0000200d\U00000308\U0000200d", {8205}, {3}}, + {L"\U0000200d\U00000378", {8205, 888}, {1, 2}}, + {L"\U0000200d\U00000308\U00000378", {8205, 888}, {2, 3}}, + {L"\U00000378\U00000020", {888, 32}, {1, 2}}, + {L"\U00000378\U00000308\U00000020", {888, 32}, {2, 3}}, + {L"\U00000378\U0000000d", {888, 13}, {1, 2}}, + {L"\U00000378\U00000308\U0000000d", {888, 13}, {2, 3}}, + {L"\U00000378\U0000000a", {888, 10}, {1, 2}}, + {L"\U00000378\U00000308\U0000000a", {888, 10}, {2, 3}}, + {L"\U00000378\U00000001", {888, 1}, {1, 2}}, + {L"\U00000378\U00000308\U00000001", {888, 1}, {2, 3}}, + {L"\U00000378\U0000034f", {888}, {2}}, + {L"\U00000378\U00000308\U0000034f", {888}, {3}}, + {L"\U00000378\U0001f1e6", {888, 127462}, {1, 2}}, + {L"\U00000378\U00000308\U0001f1e6", {888, 127462}, {2, 3}}, + {L"\U00000378\U00000600", {888, 1536}, {1, 2}}, + {L"\U00000378\U00000308\U00000600", {888, 1536}, {2, 3}}, + {L"\U00000378\U00000903", {888}, {2}}, + {L"\U00000378\U00000308\U00000903", {888}, {3}}, + {L"\U00000378\U00001100", {888, 4352}, {1, 2}}, + {L"\U00000378\U00000308\U00001100", {888, 4352}, {2, 3}}, + {L"\U00000378\U00001160", {888, 4448}, {1, 2}}, + {L"\U00000378\U00000308\U00001160", {888, 4448}, {2, 3}}, + {L"\U00000378\U000011a8", {888, 4520}, {1, 2}}, + {L"\U00000378\U00000308\U000011a8", {888, 4520}, {2, 3}}, + {L"\U00000378\U0000ac00", {888, 44032}, {1, 2}}, + {L"\U00000378\U00000308\U0000ac00", {888, 44032}, {2, 3}}, + {L"\U00000378\U0000ac01", {888, 44033}, {1, 2}}, + {L"\U00000378\U00000308\U0000ac01", {888, 44033}, {2, 3}}, + {L"\U00000378\U0000231a", {888, 8986}, {1, 2}}, + {L"\U00000378\U00000308\U0000231a", {888, 8986}, {2, 3}}, + {L"\U00000378\U00000300", {888}, {2}}, + {L"\U00000378\U00000308\U00000300", {888}, {3}}, + {L"\U00000378\U0000200d", {888}, {2}}, + {L"\U00000378\U00000308\U0000200d", {888}, {3}}, + {L"\U00000378\U00000378", {888, 888}, {1, 2}}, + {L"\U00000378\U00000308\U00000378", {888, 888}, {2, 3}}, + {L"\U0000000d\U0000000a\U00000061\U0000000a\U00000308", {13, 97, 10, 776}, {2, 3, 4, 5}}, + {L"\U00000061\U00000308", {97}, {2}}, + {L"\U00000020\U0000200d\U00000646", {32, 1606}, {2, 3}}, + {L"\U00000646\U0000200d\U00000020", {1606, 32}, {2, 3}}, + {L"\U00001100\U00001100", {4352}, {2}}, + {L"\U0000ac00\U000011a8\U00001100", {44032, 4352}, {2, 3}}, + {L"\U0000ac01\U000011a8\U00001100", {44033, 4352}, {2, 3}}, + {L"\U0001f1e6\U0001f1e7\U0001f1e8\U00000062", {127462, 127464, 98}, {2, 3, 4}}, + {L"\U00000061\U0001f1e6\U0001f1e7\U0001f1e8\U00000062", {97, 127462, 127464, 98}, {1, 3, 4, 5}}, + {L"\U00000061\U0001f1e6\U0001f1e7\U0000200d\U0001f1e8\U00000062", {97, 127462, 127464, 98}, {1, 4, 5, 6}}, + {L"\U00000061\U0001f1e6\U0000200d\U0001f1e7\U0001f1e8\U00000062", {97, 127462, 127463, 98}, {1, 3, 5, 6}}, + {L"\U00000061\U0001f1e6\U0001f1e7\U0001f1e8\U0001f1e9\U00000062", {97, 127462, 127464, 98}, {1, 3, 5, 6}}, + {L"\U00000061\U0000200d", {97}, {2}}, + {L"\U00000061\U00000308\U00000062", {97, 98}, {2, 3}}, + {L"\U00000061\U00000903\U00000062", {97, 98}, {2, 3}}, + {L"\U00000061\U00000600\U00000062", {97, 1536}, {1, 3}}, + {L"\U0001f476\U0001f3ff\U0001f476", {128118, 128118}, {2, 3}}, + {L"\U00000061\U0001f3ff\U0001f476", {97, 128118}, {2, 3}}, + {L"\U00000061\U0001f3ff\U0001f476\U0000200d\U0001f6d1", {97, 128118}, {2, 5}}, + {L"\U0001f476\U0001f3ff\U00000308\U0000200d\U0001f476\U0001f3ff", {128118}, {6}}, + {L"\U0001f6d1\U0000200d\U0001f6d1", {128721}, {3}}, + {L"\U00000061\U0000200d\U0001f6d1", {97, 128721}, {2, 3}}, + {L"\U00002701\U0000200d\U00002701", {9985}, {3}}, + {L"\U00000061\U0000200d\U00002701", {97, 9985}, {2, 3}}}}; + +#endif // LIBCXX_TEST_STD_UTILITIES_FORMAT_FORMAT_STRING_FORMAT_STRING_STD_EXTENDED_GRAPHEME_CLUSTER_H diff --git a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.pass.cpp b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.pass.cpp @@ -0,0 +1,91 @@ +//===----------------------------------------------------------------------===// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: libcpp-has-no-incomplete-format +// UNSUPPORTED: gcc-11 + +// + +// Tests the implementation of the extended grapheme cluster boundaries per +// https://www.unicode.org/reports/tr29/tr29-35.html#Grapheme_Cluster_Boundary_Rules +// +// The tests are based on the test data provided by Unicode +// http://www.unicode.org/Public/12.0.0/ucd/auxiliary/GraphemeBreakTest.txt + +#include +#include +#include +#include + +#include "extended_grapheme_cluster.h" + +// Validates whether the number of code points in our "database" matches with +// the number in the Unicode. The assumtion is when the number of items per +// property matches the code points themselves also match. +namespace { +namespace cluster = std::__extended_grapheme_custer_property_boundary; +constexpr int count_entries(cluster::__property property) { + return std::transform_reduce(std::begin(cluster::__entries), std::end(cluster::__entries), 0, std::plus{}, + [property](auto entry) { + if (static_cast(entry & 0xf) != property) + return 0; + + return 1 + static_cast((entry >> 4) & 0x7f); + }); +} + +static_assert(count_entries(cluster::__property::__Prepend) == 22); +static_assert(count_entries(cluster::__property::__CR) == 1); +static_assert(count_entries(cluster::__property::__LF) == 1); +static_assert(count_entries(cluster::__property::__Control) == 3886); +static_assert(count_entries(cluster::__property::__Extend) == 1970); +static_assert(count_entries(cluster::__property::__Regional_Indicator) == 26); +static_assert(count_entries(cluster::__property::__SpacingMark) == 375); +static_assert(count_entries(cluster::__property::__L) == 125); +static_assert(count_entries(cluster::__property::__V) == 95); +static_assert(count_entries(cluster::__property::__T) == 137); +static_assert(count_entries(cluster::__property::__LV) == 399); +static_assert(count_entries(cluster::__property::__LVT) == 10773); +static_assert(count_entries(cluster::__property::__ZWJ) == 1); +static_assert(count_entries(cluster::__property::__Extended_Pictographic) == 3793); + +} // namespace + +template +constexpr void test(const Data& data) { + for (const auto& d : data) { + assert(d.code_points.size() == d.breaks.size()); + + std::__unicode::__extended_grapheme_cluster_view view{d.input.data(), d.input.data() + d.input.size()}; + for (size_t i = 0; i < d.breaks.size(); ++i) { + auto r = view.__consume(); + assert(r.__code_point_ == d.code_points[i]); + assert(r.__last_ == d.input.data() + d.breaks[i]); + } + } +} + +constexpr bool test() { + test(data_utf8); + +#ifndef TEST_HAS_NO_WIDE_CHARACTERS + if constexpr (sizeof(wchar_t) == 2) + test(data_utf16); + else + test(data_utf32); +#endif + + return true; +} + +int main(int, char**) { + test(); + // static_assert(test()); + + return 0; +} diff --git a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_string_unicode.pass.cpp b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_string_unicode.pass.cpp --- a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_string_unicode.pass.cpp +++ b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_string_unicode.pass.cpp @@ -61,29 +61,17 @@ estimate_column_width_fast(3, CSTR("abc")); estimate_column_width_fast(3, CSTR("a\u007fc")); - if constexpr (sizeof(CharT) == 1) { - // UTF-8 stop at the first multi-byte character. - estimate_column_width_fast(0, CSTR("\u0080bc")); - estimate_column_width_fast(1, CSTR("a\u0080c")); - estimate_column_width_fast(2, CSTR("ab\u0080")); - estimate_column_width_fast(1, CSTR("aßc")); - - estimate_column_width_fast(1, CSTR("a\u07ffc")); - estimate_column_width_fast(1, CSTR("a\u0800c")); - - estimate_column_width_fast(1, CSTR("a\u10ffc")); - } else { - // UTF-16/32 stop at the first multi-column character. - estimate_column_width_fast(3, CSTR("\u0080bc")); - estimate_column_width_fast(3, CSTR("a\u0080c")); - estimate_column_width_fast(3, CSTR("ab\u0080")); - estimate_column_width_fast(3, CSTR("aßc")); - - estimate_column_width_fast(3, CSTR("a\u07ffc")); - estimate_column_width_fast(3, CSTR("a\u0800c")); - - estimate_column_width_fast(3, CSTR("a\u10ffc")); - } + // Always stop at the first multi-byte character. + estimate_column_width_fast(0, CSTR("\u0080bc")); + estimate_column_width_fast(1, CSTR("a\u0080c")); + estimate_column_width_fast(2, CSTR("ab\u0080")); + estimate_column_width_fast(1, CSTR("aßc")); + + estimate_column_width_fast(1, CSTR("a\u07ffc")); + estimate_column_width_fast(1, CSTR("a\u0800c")); + + estimate_column_width_fast(1, CSTR("a\u10ffc")); + // First 2-column character estimate_column_width_fast(1, CSTR("a\u1100c")); @@ -228,12 +216,7 @@ get_string_alignment(2, 2, false, CSTR("a\xe0"), 0, 3); get_string_alignment(2, 2, false, CSTR("a\xf0"), 0, 3); } else if constexpr (sizeof(CharT) == 2) { - // Corrupt UTF-16 sequence. - if constexpr (std::same_as) - get_string_alignment(2, 2, false, u"a\xdddd", 0, 3); - else - // Corrupt UTF-16 wchar_t seqence. - get_string_alignment(2, 2, false, L"a\xdddd", 0, 3); + get_string_alignment(2, 2, false, L"a\xdddd", 0, 3); } // UTF-32 doesn't combine characters, thus no corruption tests. } @@ -250,13 +233,7 @@ #ifndef TEST_HAS_NO_WIDE_CHARACTERS test(); #endif -#ifndef _LIBCPP_HAS_NO_CHAR8_T - test(); -#endif -#ifndef TEST_HAS_NO_UNICODE_CHARS - test(); - test(); -#endif + return true; } diff --git a/libcxx/test/std/utilities/format/format.functions/format_tests.h b/libcxx/test/std/utilities/format/format.functions/format_tests.h --- a/libcxx/test/std/utilities/format/format.functions/format_tests.h +++ b/libcxx/test/std/utilities/format/format.functions/format_tests.h @@ -362,6 +362,24 @@ check.template operator()<"{:-<7}">(SV("a\u1110c---"), STR("a\u1110c")); check.template operator()<"{:-^7}">(SV("-a\u1110c--"), STR("a\u1110c")); check.template operator()<"{:->7}">(SV("---a\u1110c"), STR("a\u1110c")); + + // Examples used in P1868R2 + check.template operator()<"{:*^3}">(SV("*\u0041*"), STR("\u0041")); // { LATIN CAPITAL LETTER A } + check.template operator()<"{:*^3}">(SV("*\u00c1*"), STR("\u00c1")); // { LATIN CAPITAL LETTER A WITH ACUTE } + check.template operator()<"{:*^3}">(SV("*\u0041\u0301*"), + STR("\u0041\u0301")); // { LATIN CAPITAL LETTER A } { COMBINING ACUTE ACCENT } + check.template operator()<"{:*^3}">(SV("*\u0132*"), STR("\u0132")); // { LATIN CAPITAL LIGATURE IJ } + check.template operator()<"{:*^3}">(SV("*\u0394*"), STR("\u0394")); // { GREEK CAPITAL LETTER DELTA } + + check.template operator()<"{:*^3}">(SV("*\u0429*"), STR("\u0429")); // { CYRILLIC CAPITAL LETTER SHCHA } + check.template operator()<"{:*^3}">(SV("*\u05d0*"), STR("\u05d0")); // { HEBREW LETTER ALEF } + check.template operator()<"{:*^3}">(SV("*\u0634*"), STR("\u0634")); // { ARABIC LETTER SHEEN } + check.template operator()<"{:*^4}">(SV("*\u3009*"), STR("\u3009")); // { RIGHT-POINTING ANGLE BRACKET } + check.template operator()<"{:*^4}">(SV("*\u754c*"), STR("\u754c")); // { CJK Unified Ideograph-754C } + check.template operator()<"{:*^4}">(SV("*\U0001f921*"), STR("\U0001f921")); // { UNICORN FACE } + check.template operator()<"{:*^4}">( + SV("*\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466*"), + STR("\U0001f468\u200d\U0001F469\u200d\U0001F467\u200d\U0001F466")); // { Family: Man, Woman, Girl, Boy } #endif // TEST_HAS_NO_UNICODE } @@ -2545,7 +2563,6 @@ // The `char` to `wchar_t` formatting is tested separately. check.template operator()<"hello {}{}{}{}{}{}{}">(SV("hello 09azAZ!"), CharT('0'), CharT('9'), CharT('a'), CharT('z'), CharT('A'), CharT('Z'), CharT('!')); - format_test_char(check, check_exception); format_test_char_as_integer(check, check_exception); diff --git a/libcxx/utils/generate_extended_grapheme_cluster_table.py b/libcxx/utils/generate_extended_grapheme_cluster_table.py new file mode 100755 --- /dev/null +++ b/libcxx/utils/generate_extended_grapheme_cluster_table.py @@ -0,0 +1,324 @@ +#!/usr/bin/env python +# ===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ===----------------------------------------------------------------------===## + +# The code is based on +# https://github.com/microsoft/STL/blob/main/tools/unicode_properties_parse/grapheme_break_property_data_gen.py +# +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +from io import StringIO +from pathlib import Path +from dataclasses import dataclass, field +from typing import Optional +import re + + +@dataclass +class PropertyRange: + lower: int = -1 + upper: int = -1 + prop: str = None + + +@dataclass +class Entry: + lower: int = -1 + offset: int = -1 + prop: int = -1 + + +LINE_REGEX = re.compile( + r"^(?P[0-9A-F]{4,5})(?:\.\.(?P[0-9A-F]{4,5}))?\s*;\s*(?P\w+)" +) + + +def parsePropertyLine(inputLine: str) -> Optional[PropertyRange]: + result = PropertyRange() + if m := LINE_REGEX.match(inputLine): + lower_str, upper_str, result.prop = m.group("lower", "upper", "prop") + result.lower = int(lower_str, base=16) + result.upper = result.lower + if upper_str is not None: + result.upper = int(upper_str, base=16) + return result + + else: + return None + + +def compactPropertyRange(input: list[PropertyRange]) -> list[PropertyRange]: + """ + Merges consecutive ranges with the same property to one range. + + Merging the ranges results in less ranges in the output table. Reducing the + size of the binary and improving the performance of the loookup. + """ + result = list() + for x in input: + if ( + len(result) + and result[-1].prop == x.prop + and result[-1].upper + 1 == x.lower + ): + result[-1].upper = x.upper + continue + result.append(x) + return result + + +PROP_VALUE_ENUMERATOR_TEMPLATE = "__{}" +PROP_VALUE_ENUM_TEMPLATE = """ +enum class __property : uint8_t {{ + // Values generated from data files. + {enumerators}, + + // The properies below aren't stored in the "database". + + // Text position properties. + __sot, + __eot, + + // The code unit has none of above properties. + __none +}}; +""" + +DATA_ARRAY_TEMPLATE = """ +/// The entry of the extended grapheme cluster bondary property tqble. +/// +/// The original MSVC STL code stores the data in two /// parallel arrays: +/// - One uint32_t with the lower bounds for the code points. +/// - One uint16_t with the size and the property. +/// This requires 6 bytes per entry. +/// +/// In libc++ this is stored in an array with 4 bytes per entry: +/// - An unicode code point is restricted to 21-bit values. +/// - There are 14 properties requiring 4 bits. +/// - This leaves 7 bits to encode the range of an entry. +/// +/// MSVC STL has 12 bits for the range (4096 values) and libc++ 7 bits +/// (128 values). For most ranges 7 bits is sufficient. When a range has more +/// than 128 entries the range is split in multiple entries. +/// +/// Based on Unicode 12 the difference is +/// MSVC STL 1602 * 6 = 9612 bytes +/// libc++ 1643 * 4 = 6572 bytes +/// ========== - +/// saving 3040 bytes +/// +/// The measured overheaded of the additional masking is about 3% in +/// libcxx/benchmarks/std_format_spec_string_unicode.bench.cpp +struct __entry {{ + /// Code point of the lower bound of the range. + uint32_t __lower_bound : 21; + /// Offset to the upper bound of the range, 0 means a range of 1 code unite. + uint32_t __offset : 7; + /// The propery of the range. + uint32_t __property : 4; +}}; + +static_assert(sizeof(__entry) == sizeof(uint32_t)); + +/// The table with the extended grapheme cluster bondary properties. +/// +/// The contents are generated by a script. +inline constexpr uint32_t __entries[{size}] = {{{entries}}}; + +/// Returns the extended grapheme cluster bondary property of a code point. +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __property __get_property(const char32_t __code_point) noexcept {{ + // TODO FMT use ranges + ptrdiff_t __i = std::upper_bound(__entries, std::end(__entries), (__code_point << 11) | 0x7ffu) - __entries; + if (__i == 0) + return __property::__none; + + --__i; + uint32_t __upper_bound = (__entries[__i] >> 11) + ((__entries[__i] >> 4) & 0x7f); + if (__code_point <= __upper_bound) + return static_cast<__property>(__entries[__i] & 0xf); + + return __property::__none; +}} +""" + +MSVC_FORMAT_UCD_TABLES_HPP_TEMPLATE = """ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// WARNING, this entire header is generated by +// utiles/generate_extended_grapheme_cluster_table.py +// DO NOT MODIFY! + +// UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE +// +// See Terms of Use +// for definitions of Unicode Inc.'s Data Files and Software. +// +// NOTICE TO USER: Carefully read the following legal agreement. +// BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S +// DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"), +// YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE +// TERMS AND CONDITIONS OF THIS AGREEMENT. +// IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE +// THE DATA FILES OR SOFTWARE. +// +// COPYRIGHT AND PERMISSION NOTICE +// +// Copyright (c) 1991-2022 Unicode, Inc. All rights reserved. +// Distributed under the Terms of Use in https://www.unicode.org/copyright.html. +// +// Permission is hereby granted, free of charge, to any person obtaining +// a copy of the Unicode data files and any associated documentation +// (the "Data Files") or Unicode software and any associated documentation +// (the "Software") to deal in the Data Files or Software +// without restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, and/or sell copies of +// the Data Files or Software, and to permit persons to whom the Data Files +// or Software are furnished to do so, provided that either +// (a) this copyright and permission notice appear with all copies +// of the Data Files or Software, or +// (b) this copyright and permission notice appear in associated +// Documentation. +// +// THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT OF THIRD PARTY RIGHTS. +// IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS +// NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL +// DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, +// DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +// TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +// PERFORMANCE OF THE DATA FILES OR SOFTWARE. +// +// Except as contained in this notice, the name of a copyright holder +// shall not be used in advertising or otherwise to promote the sale, +// use or other dealings in these Data Files or Software without prior +// written authorization of the copyright holder. + +#ifndef _LIBCPP___FORMAT_EXTENDED_GRAPHEME_CLUSTER_TABLE_H +#define _LIBCPP___FORMAT_EXTENDED_GRAPHEME_CLUSTER_TABLE_H + +#include <__algorithm/upper_bound.h> +#include <__config> +#include <__iterator/access.h> +#include +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 17 + +namespace __extended_grapheme_custer_property_boundary {{ + +{content} + +}} // __extended_grapheme_custer_property_boundary + +#endif //_LIBCPP_STD_VER > 17 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FORMAT_EXTENDED_GRAPHEME_CLUSTER_TABLE_H +""" + + +def property_ranges_to_table( + ranges: list[PropertyRange], props: list[str] +) -> list[Entry]: + assert len(props) < 16 + result = list[Entry]() + high = -1 + for range in sorted(ranges, key=lambda x: x.lower): + # Validate overlapping ranges + assert range.lower > high + high = range.upper + + while True: + e = Entry(range.lower, range.upper - range.lower, props.index(range.prop)) + if e.offset <= 127: + result.append(e) + break + e.offset = 127 + result.append(e) + range.lower += 128 + return result + + +cpp_entrytemplate = "0x{:08x}" + + +def generate_cpp_data(prop_name: str, ranges: list[PropertyRange]) -> str: + result = StringIO() + prop_values = sorted(set(x.prop for x in ranges)) + table = property_ranges_to_table(ranges, prop_values) + enumerator_values = [PROP_VALUE_ENUMERATOR_TEMPLATE.format(x) for x in prop_values] + result.write( + PROP_VALUE_ENUM_TEMPLATE.format(enumerators=",".join(enumerator_values)) + ) + result.write( + DATA_ARRAY_TEMPLATE.format( + prop_name=prop_name, + size=len(table), + entries=",".join( + [cpp_entrytemplate.format(x.lower << 11 | x.offset << 4 | x.prop) for x in table] + ), + ) + ) + + return result.getvalue() + + +def generate_data_tables() -> str: + """ + Generate Unicode data for inclusion into from + GraphemeBreakProperty.txt and emoji-data.txt. + + GraphemeBreakProperty.txt can be found at + https://www.unicode.org/Public/12.0.0/ucd/auxiliary/GraphemeBreakProperty.txt + + emoji-data.txt can be found at + https://www.unicode.org/Public/12.0.0/ucd/emoji/emoji-data.txt + + Both files are expected to be in the same directory as this script. + """ + gbp_data_path = Path(__file__).absolute().with_name("GraphemeBreakProperty.txt") + emoji_data_path = Path(__file__).absolute().with_name("emoji-data.txt") + gbp_ranges = list() + emoji_ranges = list() + with gbp_data_path.open(encoding="utf-8") as f: + gbp_ranges = compactPropertyRange( + [x for line in f if (x := parsePropertyLine(line))] + ) + with emoji_data_path.open(encoding="utf-8") as f: + emoji_ranges = compactPropertyRange( + [x for line in f if (x := parsePropertyLine(line))] + ) + + [gbp_ranges.append(x) for x in emoji_ranges if x.prop == "Extended_Pictographic"] + gpb_cpp_data = generate_cpp_data("Grapheme_Break", gbp_ranges) + return "\n".join([gpb_cpp_data]) + + +if __name__ == "__main__": + print( + MSVC_FORMAT_UCD_TABLES_HPP_TEMPLATE.lstrip().format( + content=generate_data_tables() + ) + ) diff --git a/libcxx/utils/generate_extended_grapheme_cluster_test.py b/libcxx/utils/generate_extended_grapheme_cluster_test.py new file mode 100755 --- /dev/null +++ b/libcxx/utils/generate_extended_grapheme_cluster_test.py @@ -0,0 +1,244 @@ +#!/usr/bin/env python +# ===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ===----------------------------------------------------------------------===## + +# The code is based on +# https://github.com/microsoft/STL/blob/main/tools/unicode_properties_parse/grapheme_break_test_data_gen.py +# +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +from pathlib import Path +from dataclasses import dataclass, field +from typing import Optional, TextIO +from array import array + + +@dataclass +class BreakTestItem: + code_points: list[int] = field(default_factory=list) + encoded: str = "" + breaks_utf8: list[int] = field(default_factory=list) + breaks_utf16: list[int] = field(default_factory=list) + breaks_utf32: list[int] = field(default_factory=list) + + +class CommentLine: + pass + + +class EOF: + pass + + +def parseBreakTestLine(input: TextIO) -> Optional[BreakTestItem]: + result = BreakTestItem() + code_point = -1 + utf8 = 0 + utf16 = 0 + utf32 = 0 + + while True: + c = input.read(1) + if c == "\N{DIVISION SIGN}": + # The line starts with a division sign, don't add it to the output. + if code_point != -1: + result.code_points.append(code_point) + code_point = -1 + result.breaks_utf8.append(utf8) + result.breaks_utf16.append(utf16) + result.breaks_utf32.append(utf32) + + assert input.read(1).isspace() + continue + if c == "\N{MULTIPLICATION SIGN}": + assert input.read(1).isspace() + continue + if c.isalnum(): + while next := input.read(1): + if next.isalnum(): + c += next + else: + assert next.isspace() + break + i = int(c, base=16) + if code_point == -1: + code_point = i + + result.encoded += f"\\U{i:08x}" + c = chr(i) + utf8 += c.encode().__len__() + # Since we only care about the number of code units the byte order + # doesn't matter. The byte order is specified to avoid the BOM + utf16 += int(c.encode("utf-16-le").__len__() / 2) + utf32 += int(c.encode("utf-32-le").__len__() / 4) + continue + if c == "#": + input.readline() + return result + if c == "\n": + return result + if c == "": + return None + assert False + + +cpp_template = """// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// WARNING, this entire header is generated by +// utiles/generate_extended_grapheme_cluster_test.py +// DO NOT MODIFY! + +// UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE +// +// See Terms of Use +// for definitions of Unicode Inc.'s Data Files and Software. +// +// NOTICE TO USER: Carefully read the following legal agreement. +// BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S +// DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"), +// YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE +// TERMS AND CONDITIONS OF THIS AGREEMENT. +// IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE +// THE DATA FILES OR SOFTWARE. +// +// COPYRIGHT AND PERMISSION NOTICE +// +// Copyright (c) 1991-2022 Unicode, Inc. All rights reserved. +// Distributed under the Terms of Use in https://www.unicode.org/copyright.html. +// +// Permission is hereby granted, free of charge, to any person obtaining +// a copy of the Unicode data files and any associated documentation +// (the "Data Files") or Unicode software and any associated documentation +// (the "Software") to deal in the Data Files or Software +// without restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, and/or sell copies of +// the Data Files or Software, and to permit persons to whom the Data Files +// or Software are furnished to do so, provided that either +// (a) this copyright and permission notice appear with all copies +// of the Data Files or Software, or +// (b) this copyright and permission notice appear in associated +// Documentation. +// +// THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT OF THIRD PARTY RIGHTS. +// IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS +// NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL +// DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, +// DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +// TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +// PERFORMANCE OF THE DATA FILES OR SOFTWARE. +// +// Except as contained in this notice, the name of a copyright holder +// shall not be used in advertising or otherwise to promote the sale, +// use or other dealings in these Data Files or Software without prior +// written authorization of the copyright holder. + +#ifndef LIBCXX_TEST_STD_UTILITIES_FORMAT_FORMAT_STRING_FORMAT_STRING_STD_EXTENDED_GRAPHEME_CLUSTER_H +#define LIBCXX_TEST_STD_UTILITIES_FORMAT_FORMAT_STRING_FORMAT_STRING_STD_EXTENDED_GRAPHEME_CLUSTER_H + +#include +#include +#include + +template +struct data {{ + /// The input to parse. + std::basic_string_view input; + + /// The first code point all extended grapheme clusters in the input. + std::vector code_points; + + /// The offset of the last code units of the extended grapheme clusters in the input. + /// + /// The vector has the same number of entries as \\ref code_points. + std::vector breaks; +}}; + +/// The data for UTF-8. +std::array, {0}> data_utf8 = {{{{ {1} }}}}; + +/// The data for UTF-16. +/// +/// Note that most of the data for the UTF-16 and UTF-32 are identical. However +/// since the size of the code units differ the breaks can contain different +/// values. +std::array, {0}> data_utf16 = {{{{ {2} }}}}; + +/// The data for UTF-8. +/// +/// Note that most of the data for the UTF-16 and UTF-32 are identical. However +/// since the size of the code units differ the breaks can contain different +/// values. +std::array, {0}> data_utf32 = {{{{ {3} }}}}; + +#endif // LIBCXX_TEST_STD_UTILITIES_FORMAT_FORMAT_STRING_FORMAT_STRING_STD_EXTENDED_GRAPHEME_CLUSTER_H +""" + +cpp_test_data_line_template = "{{ {}, {{ {} }}, {{ {} }} }}" + + +def lineToCppDataLineUtf8(line: BreakTestItem) -> str: + return cpp_test_data_line_template.format( + f'"{line.encoded}"', + ",".join([str(x) for x in line.code_points]), + ",".join([str(x) for x in line.breaks_utf8]), + ) + + +def lineToCppDataLineUtf16(line: BreakTestItem) -> str: + return cpp_test_data_line_template.format( + f'L"{line.encoded}"', + ",".join([str(x) for x in line.code_points]), + ",".join([str(x) for x in line.breaks_utf16]), + ) + + +def lineToCppDataLineUtf32(line: BreakTestItem) -> str: + return cpp_test_data_line_template.format( + f'L"{line.encoded}"', + ",".join([str(x) for x in line.code_points]), + ",".join([str(x) for x in line.breaks_utf32]), + ) + + +""" +Generate test data from "GraphemeBreakText.txt" +This file can be downloaded from: https://www.unicode.org/Public/12.0.0/ucd/auxiliary/GraphemeBreakTest.txt +This script looks for GraphemeBreakTest.txt in same directory as this script +""" + + +def generate_all() -> str: + test_data_path = Path(__file__) + test_data_path = test_data_path.absolute() + test_data_path = test_data_path.with_name("GraphemeBreakTest.txt") + lines = list() + with open(test_data_path, mode="rt", encoding="utf-8") as file: + while line := parseBreakTestLine(file): + if len(line.encoded) > 0: + lines.append(line) + return cpp_template.format( + len(lines), + ",".join(map(lineToCppDataLineUtf8, lines)), + ",".join(map(lineToCppDataLineUtf16, lines)), + ",".join(map(lineToCppDataLineUtf32, lines)), + ) + + +if __name__ == "__main__": + print(generate_all())