diff --git a/libc/include/uchar.h b/libc/include/uchar.h new file mode 100644 --- /dev/null +++ b/libc/include/uchar.h @@ -0,0 +1,48 @@ +//===---------------- C standard library header uchar.h -----------------*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_UCHAR_H +#define LLVM_LIBC_UCHAR_H + +#include <__llvm-libc-common.h> + +#ifndef __STDC_UTF_16__ +#define __STDC_UTF_16__ 1 +#endif + +#ifndef __STDC_UTF_32__ +#define __STDC_UTF_32__ 1 +#endif + +#if !defined(__cplusplus) && defined(__STDC_UTF_16__) && (__STDC_VERSION__ >= 201112L) +/* ISO9899:2011 7.28: Define char16_t if __STDC_UTF_32__ is set. */ +#define __CHAR16_TYPE__ +typedef uint_least16_t char16_t; +#endif /* __STDC_UTF_16__ */ + +#if !defined(__cplusplus) && defined(__STDC_UTF_32__) && (__STDC_VERSION__ >= 201112L) +/* ISO9899:2011 7.28: Define char32_t if __STDC_UTF_32__ is enabled. */ +#define __CHAR32_TYPE__ +typedef uint_least32_t char32_t; +#endif /* __STDC_UTF_32__ */ + +__BEGIN_C_DECLS + +typedef int mbstate_t; // Unused, exists for ShiftJIS which I'm not adding support for + +size_t mbrtoc16(char16_t * restrict pc16, const char * restrict s, size_t n, mbstate_t * restrict ps); + +size_t c16rtomb(char * restrict s, char16_t c16, mbstate_t * restrict ps); + +size_t mbrtoc32(char32_t * restrict pc32, const char * restrict s, size_t n, mbstate_t * restrict ps); + +size_t c32rtomb(char * restrict s, char32_t c32, mbstate_t * restrict ps); + +__END_C_DECLS + +#endif // LLVM_LIBC_UCHAR_H diff --git a/libc/src/CMakeLists.txt b/libc/src/CMakeLists.txt --- a/libc/src/CMakeLists.txt +++ b/libc/src/CMakeLists.txt @@ -1,6 +1,7 @@ add_subdirectory(errno) add_subdirectory(math) add_subdirectory(string) +add_subdirectory(uchar) # TODO: Add this target conditional to the target OS. add_subdirectory(sys) diff --git a/libc/src/uchar/CMakeLists.txt b/libc/src/uchar/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/libc/src/uchar/CMakeLists.txt @@ -0,0 +1,35 @@ +add_entrypoint_object( + mbrtoc16 + SRCS + mbrtoc16.cpp + HDRS + mbrtoc16.h + #DEPENDS +) + +add_entrypoint_object( + c16rtomb + SRCS + c16rtomb.cpp + HDRS + c16rtomb.h + #DEPENDS +) + +add_entrypoint_object( + mbrtoc32 + SRCS + mbrtoc32.cpp + HDRS + mbrtoc32.h + #DEPENDS +) + +add_entrypoint_object( + c32rtomb + SRCS + c32rtomb.cpp + HDRS + c32rtomb.h + #DEPENDS +) \ No newline at end of file diff --git a/libc/src/uchar/c16rtomb.h b/libc/src/uchar/c16rtomb.h new file mode 100644 --- /dev/null +++ b/libc/src/uchar/c16rtomb.h @@ -0,0 +1,20 @@ +//===----------------- Implementation header for c16rtomb -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_UCHAR_C16RTOMB_H +#define LLVM_LIBC_SRC_UCHAR_C16RTOMB_H + +#include "include/uchar.h" + +namespace __llvm_libc { + + size_t c16rtomb(char * restrict s, char16_t c16, mbstate_t * restrict ps); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_UCHAR_C16RTOMB_H diff --git a/libc/src/uchar/c16rtomb.cpp b/libc/src/uchar/c16rtomb.cpp new file mode 100644 --- /dev/null +++ b/libc/src/uchar/c16rtomb.cpp @@ -0,0 +1,46 @@ +//===-------------------- Implementation of c16rtomb -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/uchar/c16rtomb.h" + +#include "src/__support/common.h" + +namespace __llvm_libc { + + size_t LLVM_LIBC_ENTRYPOINT(c16rtomb)(char * restrict s, char16_t c16, mbstate_t * restrict ps) { + size_t StringSize = 0ULL; + /* + s = output string of UTF-8 code units + c16 = the non-surrogate pair UTF-16 code unit to convert to UTF-8 + ps = string position? + return value = size of s in bytes aka the number of bytes that had to be added + */ + if (c16 >= 0xD800 && c16 <= 0xDFFF) { + c16 = 0xFFFD; // Invalid Replacement Character + } + + if (C16 <= 0x7F) { + StringSize = 1; + s = calloc(StringSize, sizeof(uint8_t)); + s[0] = C16 & 0x7F; + } else if (C16 <= 0x7FF) { + StringSize = 2; + s = calloc(StringSize, sizeof(uint8_t)); + s[0] = 0xC0 | (C16 & ((0x1F << 6) >> 6)); + s[1] = 0x80 | (C16 & 0x3F); + } else if (C16 <= 0xFFFF) { + StringSize = 3; + s = calloc(StringSize, sizeof(uint8_t)); + s[0] = 0xE0 | (C16 & ((0x0F << 12) >> 12)); + s[1] = 0x80 | (C16 & ((0x3F << 6) >> 6)); + s[2] = 0x80 | (C16 & 0x3F); + } + return StringSize; + } + +} // namespace __llvm_libc diff --git a/libc/src/uchar/c32rtomb.h b/libc/src/uchar/c32rtomb.h new file mode 100644 --- /dev/null +++ b/libc/src/uchar/c32rtomb.h @@ -0,0 +1,20 @@ +//===----------------- Implementation header for c32rtomb -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_UCHAR_C32RTOMB_H +#define LLVM_LIBC_SRC_UCHAR_C32RTOMB_H + +#include "include/uchar.h" + +namespace __llvm_libc { + + size_t c16rtomb(char * restrict s, char16_t c16, mbstate_t * restrict ps); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_UCHAR_C16RTOMB_H diff --git a/libc/src/uchar/c32rtomb.cpp b/libc/src/uchar/c32rtomb.cpp new file mode 100644 --- /dev/null +++ b/libc/src/uchar/c32rtomb.cpp @@ -0,0 +1,43 @@ +//===-------------------- Implementation of c32rtomb -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/uchar/c32rtomb.h" + +#include "src/__support/common.h" + +namespace __llvm_libc { + + size_t LLVM_LIBC_ENTRYPOINT(c32rtomb)(char * restrict s, char32_t c32, mbstate_t * restrict ps) { + size_t StringSize = 0ULL; + if (c32 <= 0x7F) { + StringSize = 1; + s = calloc(StringSize, sizeof(uint8_t)); + s[0] = c32 & 0x7F; + } else if (c32 <= 0x7FF) { + StringSize = 2; + s = calloc(StringSize, sizeof(uint8_t)); + s[0] = 0xC0 | (c32 & ((0x1F << 6) >> 6)); + s[1] = 0x80 | (c32 & 0x3F); + } else if (c32 <= 0xFFFF) { + StringSize = 3; + s = calloc(StringSize, sizeof(uint8_t)); + s[0] = 0xE0 | (c32 & ((0x0F << 12) >> 12)); + s[1] = 0x80 | (c32 & ((0x3F << 6) >> 6)); + s[2] = 0x80 | (c32 & 0x3F); + } else if (c32 <= 0x10FFFF) { + StringSize = 4; + s = calloc(StringSize, sizeof(uint8_t)); + s[0] = 0xF0 | (c32 & 0x1C0000) >> 18; + s[1] = 0x80 | (c32 & 0x3F000) >> 12; + s[2] = 0x80 | (c32 & 0xFC0) >> 6; + s[3] = 0x80 | (c32 & 0x3F); + } + return StringSize; + } + +} // namespace __llvm_libc diff --git a/libc/src/uchar/mbrtoc16.h b/libc/src/uchar/mbrtoc16.h new file mode 100644 --- /dev/null +++ b/libc/src/uchar/mbrtoc16.h @@ -0,0 +1,20 @@ +//===----------------- Implementation header for mbrtoc16 -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_UCHAR_MBRTOC16_H +#define LLVM_LIBC_SRC_UCHAR_MBRTOC16_H + +#include "include/uchar.h" + +namespace __llvm_libc { + + size_t mbrtoc16(char16_t * restrict pc16, const char * restrict s, size_t n, mbstate_t * restrict ps); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_UCHAR_MBRTOC16_H diff --git a/libc/src/uchar/mbrtoc16.cpp b/libc/src/uchar/mbrtoc16.cpp new file mode 100644 --- /dev/null +++ b/libc/src/uchar/mbrtoc16.cpp @@ -0,0 +1,53 @@ +//===-------------------- Implementation of mbrtoc16 -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/uchar/mbrtoc16.h" + +#include "src/__support/common.h" + +namespace __llvm_libc { + + size_t LLVM_LIBC_ENTRYPOINT(mbrtoc16)(char16_t * restrict pc16, const char * restrict s, size_t n, mbstate_t * restrict ps) { + size_t StringSize = 0ULL; + char32_t Decoded = 0; + + switch (n) { + case 1: + Decoded = s[0] & 0x7F; + break; + case 2: + Decoded |= (s[0] & 0x1F) << 6; + Decoded |= (s[1] & 0x3F) << 0; + break; + case 3: + Decoded |= (s[0] & 0x0F) << 12; + Decoded |= (s[1] & 0x1F) << 6; + Decoded |= (s[2] & 0x1F) << 0; + break; + case 4: + Decoded |= (s[0] & 0x07) << 18; + Decoded |= (s[1] & 0x3F) << 12; + Decoded |= (s[2] & 0x3F) << 6; + Decoded |= (s[3] & 0x3F) << 0; + break; + } + + if (Decoded <= 0xFFFF) { + StringSize = 1; + pc16 = calloc(1, sizeof(char16_t)); + pc16[0] = Decoded & 0xFFFF; + } else if (Decoded <= 0x10FFFF) { + StringSize = 2; + pc16 = calloc(2, sizeof(char16_t)); + pc16[0] = 0xD800 + ((Decoded & 0xFFC00) >> 10); + pc16[1] = 0xDC00 + (Decoded & 0x3FF); + } + return StringSize; + } + +} // namespace __llvm_libc diff --git a/libc/src/uchar/mbrtoc32.h b/libc/src/uchar/mbrtoc32.h new file mode 100644 --- /dev/null +++ b/libc/src/uchar/mbrtoc32.h @@ -0,0 +1,20 @@ +//===----------------- Implementation header for mbrtoc32 -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_UCHAR_MBRTOC32_H +#define LLVM_LIBC_SRC_UCHAR_MBRTOC32_H + +#include "include/uchar.h" + +namespace __llvm_libc { + + size_t mbrtoc32(char32_t * restrict pc32, const char * restrict s, size_t n, mbstate_t * restrict ps); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_UCHAR_MBRTOC32_H diff --git a/libc/src/uchar/mbrtoc32.cpp b/libc/src/uchar/mbrtoc32.cpp new file mode 100644 --- /dev/null +++ b/libc/src/uchar/mbrtoc32.cpp @@ -0,0 +1,42 @@ +//===-------------------- Implementation of mbrtoc32 -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/uchar/mbrtoc32.h" + +#include "src/__support/common.h" + +namespace __llvm_libc { + + size_t LLVM_LIBC_ENTRYPOINT(mbrtoc32)(char32_t * restrict pc32, const char * restrict s, size_t n, mbstate_t * restrict ps) { + size_t StringSize = 1ULL; + char32_t Decoded = calloc(1, sizeof(char32_t)); + + switch (n) { + case 1: + pc32[0] = s[0] & 0x7F; + break; + case 2: + pc32[0] |= (s[0] & 0x1F) << 6; + pc32[0] |= (s[1] & 0x3F) << 0; + break; + case 3: + pc32[0] |= (s[0] & 0x0F) << 12; + pc32[0] |= (s[1] & 0x1F) << 6; + pc32[0] |= (s[2] & 0x1F) << 0; + break; + case 4: + pc32[0] |= (s[0] & 0x07) << 18; + pc32[0] |= (s[1] & 0x3F) << 12; + pc32[0] |= (s[2] & 0x3F) << 6; + pc32[0] |= (s[3] & 0x3F) << 0; + break; + } + return StringSize; + } + +} // namespace __llvm_libc