diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -158,6 +158,7 @@ libc.src.stdlib.abs libc.src.stdlib.labs libc.src.stdlib.llabs + libc.src.stdlib.strtoll # signal.h entrypoints libc.src.signal.raise diff --git a/libc/spec/spec.td b/libc/spec/spec.td --- a/libc/spec/spec.td +++ b/libc/spec/spec.td @@ -62,6 +62,7 @@ def CharPtr : PtrType; def ConstCharPtr : ConstType; def CharRestrictedPtr : RestrictedPtrType; +def CharRestrictedPtrPtr : RestrictedPtrType; def ConstCharRestrictedPtr : ConstType; def OnceFlagType : NamedType<"once_flag">; diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -478,6 +478,7 @@ FunctionSpec<"abs", RetValSpec, [ArgSpec]>, FunctionSpec<"labs", RetValSpec, [ArgSpec]>, FunctionSpec<"llabs", RetValSpec, [ArgSpec]>, + FunctionSpec<"strtoll", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, FunctionSpec<"_Exit", RetValSpec, [ArgSpec]>, ] >; diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt --- a/libc/src/__support/CMakeLists.txt +++ b/libc/src/__support/CMakeLists.txt @@ -12,6 +12,16 @@ ctype_utils.h ) +add_header_library( + str_conv_utils + HDRS + str_conv_utils.h + DEPENDS + .ctype_utils + libc.include.errno + libc.src.errno.__errno_location +) + add_header_library( integer_operations HDRS diff --git a/libc/src/__support/str_conv_utils.h b/libc/src/__support/str_conv_utils.h new file mode 100644 --- /dev/null +++ b/libc/src/__support/str_conv_utils.h @@ -0,0 +1,111 @@ +//===-- Stdlib utils --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LIBC_SRC_STDLIB_STDLIB_UTILS_H +#define LIBC_SRC_STDLIB_STDLIB_UTILS_H + +#include "src/__support/ctype_utils.h" +#include +#include + +namespace __llvm_libc { +namespace internal { + +// Returns a pointer to the first character in src that is not a whitespace +// character (as determined by isspace()) +static inline const char *first_non_whitespace(const char *__restrict src) { + while (internal::isspace(*src)) { + ++src; + } + return src; +} + +static inline int b36_char_to_int(char input) { + if (isdigit(input)) + return input - '0'; + if (isalpha(input)) + return (input | 32) + 10 - 'a'; + return 0; +} + +// Takes the address of the string pointer and parses the base from the start of +// it. This will advance the string pointer. +static inline int infer_base(const char *__restrict *__restrict src) { + if (**src == '0') { + ++(*src); + if ((**src | 32) == 'x') { + ++(*src); + return 16; + } + return 8; + } + return 10; +} + +// Takes a pointer to a string, a pointer to a string pointer, and the base to +// convert to. This function is used as the backend for all of the string to int +// functions. +static inline long long strtoll(const char *__restrict src, + char **__restrict str_end, int base) { + unsigned long long result = 0; + + if (base < 0 || base == 1 || base > 36) { + errno = EINVAL; // NOLINT + return 0; + } + + src = first_non_whitespace(src); + + char result_sign = '+'; + if (*src == '+' || *src == '-') { + result_sign = *src; + ++src; + } + + if (base == 0) { + base = infer_base(&src); + } else if (base == 16 && *src == '0' && (*(src + 1) | 32) == 'x') { + src = src + 2; + } + + unsigned long long const ABS_MAX = + (result_sign == '+' ? LLONG_MAX + : static_cast(LLONG_MAX) + 1); + unsigned long long const ABS_MAX_DIV_BY_BASE = ABS_MAX / base; + while (isalnum(*src)) { + int cur_digit = b36_char_to_int(*src); + if (cur_digit >= base) + break; + if (result > ABS_MAX_DIV_BY_BASE) { + result = ABS_MAX; + errno = ERANGE; // NOLINT + break; + } + result = result * base; + if (result > ABS_MAX - cur_digit) { + result = ABS_MAX; + errno = ERANGE; // NOLINT + break; + } + result = result + cur_digit; + + ++src; + } + + if (str_end != nullptr) + *str_end = const_cast(src); + if (result_sign == '+') + return result; + else + return -result; +} + +} // namespace internal +} // namespace __llvm_libc + +#endif // LIBC_SRC_STDLIB_STDLIB_UTILS_H diff --git a/libc/src/stdlib/CMakeLists.txt b/libc/src/stdlib/CMakeLists.txt --- a/libc/src/stdlib/CMakeLists.txt +++ b/libc/src/stdlib/CMakeLists.txt @@ -50,3 +50,13 @@ DEPENDS libc.src.__support.integer_operations ) + +add_entrypoint_object( + strtoll + SRCS + strtoll.cpp + HDRS + strtoll.h + DEPENDS + libc.src.__support.str_conv_utils +) diff --git a/libc/src/stdlib/strtoll.h b/libc/src/stdlib/strtoll.h new file mode 100644 --- /dev/null +++ b/libc/src/stdlib/strtoll.h @@ -0,0 +1,19 @@ +//===-- Implementation header for strtoll -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDLIB_STRTOLL_H +#define LLVM_LIBC_SRC_STDLIB_STRTOLL_H + +namespace __llvm_libc { + +long long strtoll(const char *__restrict str, char **__restrict str_end, + int base); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDLIB_STRTOLL_H diff --git a/libc/src/stdlib/strtoll.cpp b/libc/src/stdlib/strtoll.cpp new file mode 100644 --- /dev/null +++ b/libc/src/stdlib/strtoll.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of strtoll -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdlib/strtoll.h" +#include "src/__support/common.h" +#include "src/__support/str_conv_utils.h" + +namespace __llvm_libc { + +LLVM_LIBC_FUNCTION(long long, strtoll, + (const char *__restrict str, char **__restrict str_end, + int base)) { + return internal::strtoll(str, str_end, base); +} + +} // namespace __llvm_libc diff --git a/libc/test/src/stdlib/CMakeLists.txt b/libc/test/src/stdlib/CMakeLists.txt --- a/libc/test/src/stdlib/CMakeLists.txt +++ b/libc/test/src/stdlib/CMakeLists.txt @@ -54,3 +54,15 @@ DEPENDS libc.src.stdlib.llabs ) + +add_libc_unittest( + strtoll_test + SUITE + libc_stdlib_unittests + SRCS + strtoll_test.cpp + DEPENDS + libc.src.stdlib.strtoll + libc.include.errno + libc.test.errno_setter_matcher +) diff --git a/libc/test/src/stdlib/strtoll_test.cpp b/libc/test/src/stdlib/strtoll_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/src/stdlib/strtoll_test.cpp @@ -0,0 +1,283 @@ +//===-- Unittests for strtoll ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdlib/strtoll.h" + +#include "utils/UnitTest/Test.h" + +#include +#include + +TEST(LlvmLibcStrToLLTest, InvalidBase) { + const char *ten = "10"; + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(ten, nullptr, -1), 0ll); + ASSERT_EQ(errno, EINVAL); +} + +TEST(LlvmLibcStrToLLTest, CleanBaseTenDecode) { + char *str_end = nullptr; + + const char *ten = "10"; + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(ten, &str_end, 10), 10ll); + ASSERT_EQ(errno, 0); + EXPECT_EQ(str_end - ten, 2l); + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(ten, nullptr, 10), 10ll); + ASSERT_EQ(errno, 0); + + const char *hundred = "100"; + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(hundred, &str_end, 10), 100ll); + ASSERT_EQ(errno, 0); + EXPECT_EQ(str_end - hundred, 3l); + + const char *negative = "-100"; + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(negative, &str_end, 10), -100ll); + ASSERT_EQ(errno, 0); + EXPECT_EQ(str_end - negative, 4l); + + const char *big_number = "123456789012345"; + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(big_number, &str_end, 10), 123456789012345ll); + ASSERT_EQ(errno, 0); + EXPECT_EQ(str_end - big_number, 15l); + + const char *big_negative_number = "-123456789012345"; + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(big_negative_number, &str_end, 10), + -123456789012345ll); + ASSERT_EQ(errno, 0); + EXPECT_EQ(str_end - big_negative_number, 16l); + + const char *too_big_number = "123456789012345678901"; + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(too_big_number, &str_end, 10), LLONG_MAX); + ASSERT_EQ(errno, ERANGE); + EXPECT_EQ(str_end - too_big_number, 19l); + + const char *too_big_negative_number = "-123456789012345678901"; + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(too_big_negative_number, &str_end, 10), + LLONG_MIN); + ASSERT_EQ(errno, ERANGE); + EXPECT_EQ(str_end - too_big_negative_number, 20l); +} + +TEST(LlvmLibcStrToLLTest, MessyBaseTenDecode) { + char *str_end = nullptr; + + const char *spaces_before = " 10"; + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(spaces_before, &str_end, 10), 10ll); + ASSERT_EQ(errno, 0); + EXPECT_EQ(str_end - spaces_before, 7l); + + const char *spaces_after = "10 "; + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(spaces_after, &str_end, 10), 10ll); + ASSERT_EQ(errno, 0); + EXPECT_EQ(str_end - spaces_after, 2l); + + const char *word_before = "word10"; + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(word_before, &str_end, 10), 0ll); + ASSERT_EQ(errno, 0); + EXPECT_EQ(str_end - word_before, 0l); + + const char *word_after = "10word"; + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(word_after, &str_end, 10), 10ll); + ASSERT_EQ(errno, 0); + EXPECT_EQ(str_end - word_after, 2l); + + const char *two_numbers = "10 999"; + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(two_numbers, &str_end, 10), 10ll); + ASSERT_EQ(errno, 0); + EXPECT_EQ(str_end - two_numbers, 2l); + + const char *two_signs = "--10 999"; + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(two_signs, &str_end, 10), 0ll); + ASSERT_EQ(errno, 0); + EXPECT_EQ(str_end - two_signs, 1l); + + const char *sign_before = "+2=4"; + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(sign_before, &str_end, 10), 2ll); + ASSERT_EQ(errno, 0); + EXPECT_EQ(str_end - sign_before, 2l); + + const char *sign_after = "2+2=4"; + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(sign_after, &str_end, 10), 2ll); + ASSERT_EQ(errno, 0); + EXPECT_EQ(str_end - sign_after, 1l); + + const char *tab_before = "\t10"; + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(tab_before, &str_end, 10), 10ll); + ASSERT_EQ(errno, 0); + EXPECT_EQ(str_end - tab_before, 3l); + + const char *all_together = "\t -12345and+67890"; + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(all_together, &str_end, 10), -12345ll); + ASSERT_EQ(errno, 0); + EXPECT_EQ(str_end - all_together, 9l); +} + +static char int_to_b36_char(int input) { + if (input < 0 || input > 36) + return '0'; + if (input < 10) + return '0' + input; + return 'A' + input - 10; +} + +TEST(LlvmLibcStrToLLTest, DecodeInOtherBases) { + char small_string[4] = {'\0', '\0', '\0', '\0'}; + for (int base = 2; base <= 36; ++base) { + for (long long first_digit = 0; first_digit <= 36; ++first_digit) { + small_string[0] = int_to_b36_char(first_digit); + if (first_digit < base) { + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(small_string, nullptr, base), + first_digit); + ASSERT_EQ(errno, 0); + } else { + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(small_string, nullptr, base), 0ll); + ASSERT_EQ(errno, 0); + } + } + } + + for (int base = 2; base <= 36; ++base) { + for (long long first_digit = 0; first_digit <= 36; ++first_digit) { + small_string[0] = int_to_b36_char(first_digit); + for (long long second_digit = 0; second_digit <= 36; ++second_digit) { + small_string[1] = int_to_b36_char(second_digit); + if (first_digit < base && second_digit < base) { + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(small_string, nullptr, base), + second_digit + (first_digit * base)); + ASSERT_EQ(errno, 0); + } else if (first_digit < base) { + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(small_string, nullptr, base), + first_digit); + ASSERT_EQ(errno, 0); + } else { + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(small_string, nullptr, base), 0ll); + ASSERT_EQ(errno, 0); + } + } + } + } + + for (int base = 2; base <= 36; ++base) { + for (long long first_digit = 0; first_digit <= 36; ++first_digit) { + small_string[0] = int_to_b36_char(first_digit); + for (long long second_digit = 0; second_digit <= 36; ++second_digit) { + small_string[1] = int_to_b36_char(second_digit); + for (long long third_digit = 0; third_digit <= 36; ++third_digit) { + small_string[2] = int_to_b36_char(third_digit); + + if (first_digit < base && second_digit < base && third_digit < base) { + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(small_string, nullptr, base), + third_digit + (second_digit * base) + + (first_digit * base * base)); + ASSERT_EQ(errno, 0); + } else if (first_digit < base && second_digit < base) { + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(small_string, nullptr, base), + second_digit + (first_digit * base)); + ASSERT_EQ(errno, 0); + } else if (first_digit < base) { + // if the base is 16 there is a special case for the prefix 0X. + // The number is treated as a one digit hexadecimal. + if (base == 16 && first_digit == 0 && second_digit == 33) { + if (third_digit < base) { + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(small_string, nullptr, base), + third_digit); + ASSERT_EQ(errno, 0); + } else { + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(small_string, nullptr, base), + 0ll); + ASSERT_EQ(errno, 0); + } + } else { + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(small_string, nullptr, base), + first_digit); + ASSERT_EQ(errno, 0); + } + } else { + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(small_string, nullptr, base), 0ll); + ASSERT_EQ(errno, 0); + } + } + } + } + } +} + +TEST(LlvmLibcStrToLLTest, CleanBaseSixteenDecode) { + char *str_end = nullptr; + + const char *no_prefix = "123abc"; + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(no_prefix, &str_end, 16), 0x123abcll); + ASSERT_EQ(errno, 0); + EXPECT_EQ(str_end - no_prefix, 6l); + + const char *yes_prefix = "0x456def"; + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(yes_prefix, &str_end, 16), 0x456defll); + ASSERT_EQ(errno, 0); + EXPECT_EQ(str_end - yes_prefix, 8l); +} + +TEST(LlvmLibcStrToLLTest, AutomaticBaseSelection) { + char *str_end = nullptr; + + const char *base_ten = "12345"; + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(base_ten, &str_end, 0), 12345ll); + ASSERT_EQ(errno, 0); + EXPECT_EQ(str_end - base_ten, 5l); + + const char *base_sixteen_no_prefix = "123abc"; + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(base_sixteen_no_prefix, &str_end, 0), 123ll); + ASSERT_EQ(errno, 0); + EXPECT_EQ(str_end - base_sixteen_no_prefix, 3l); + + const char *base_sixteen_with_prefix = "0x456def"; + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(base_sixteen_with_prefix, &str_end, 0), + 0x456defll); + ASSERT_EQ(errno, 0); + EXPECT_EQ(str_end - base_sixteen_with_prefix, 8l); + + const char *base_eight_with_prefix = "012345"; + errno = 0; + ASSERT_EQ(__llvm_libc::strtoll(base_eight_with_prefix, &str_end, 0), + 012345ll); + ASSERT_EQ(errno, 0); + EXPECT_EQ(str_end - base_eight_with_prefix, 6l); +}