diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -158,6 +158,7 @@ libc.src.stdlib.abs libc.src.stdlib.labs libc.src.stdlib.llabs + libc.src.stdlib.strtoll # signal.h entrypoints libc.src.signal.raise diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt --- a/libc/src/__support/CMakeLists.txt +++ b/libc/src/__support/CMakeLists.txt @@ -12,6 +12,16 @@ ctype_utils.h ) +add_header_library( + str_conv_utils + HDRS + str_conv_utils.h + DEPENDS + .ctype_utils + libc.include.errno + libc.src.errno.__errno_location +) + add_header_library( integer_operations HDRS diff --git a/libc/src/__support/str_conv_utils.h b/libc/src/__support/str_conv_utils.h new file mode 100644 --- /dev/null +++ b/libc/src/__support/str_conv_utils.h @@ -0,0 +1,112 @@ +//===-- Stdlib utils --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LIBC_SRC_STDLIB_STDLIB_UTILS_H +#define LIBC_SRC_STDLIB_STDLIB_UTILS_H + +#include "src/__support/ctype_utils.h" +#include "src/errno/llvmlibc_errno.h" +#include +#include + +namespace __llvm_libc { +namespace internal { + +// Returns a pointer to the first character in src that is not a whitespace +// character (as determined by isspace()) +static inline const char *first_non_whitespace(const char *__restrict src) { + while (internal::isspace(*src)) { + ++src; + } + return src; +} + +static inline int b36_char_to_int(char input) { + if (isdigit(input)) + return input - '0'; + if (isalpha(input)) + return (input | 32) + 10 - 'a'; + return 0; +} + +// Takes the address of the string pointer and parses the base from the start of +// it. This will advance the string pointer. +static inline int infer_base(const char *__restrict *__restrict src) { + if (**src == '0') { + ++(*src); + if ((**src | 32) == 'x') { + ++(*src); + return 16; + } + return 8; + } + return 10; +} + +// Takes a pointer to a string, a pointer to a string pointer, and the base to +// convert to. This function is used as the backend for all of the string to int +// functions. +static inline long long strtoll(const char *__restrict src, + char **__restrict str_end, int base) { + unsigned long long result = 0; + + if (base < 0 || base == 1 || base > 36) { + llvmlibc_errno = EINVAL; + return 0; + } + + src = first_non_whitespace(src); + + char result_sign = '+'; + if (*src == '+' || *src == '-') { + result_sign = *src; + ++src; + } + + if (base == 0) { + base = infer_base(&src); + } else if (base == 16 && *src == '0' && (*(src + 1) | 32) == 'x') { + src = src + 2; + } + + unsigned long long const CUR_LLONG_MAX = + (result_sign == '+' ? LLONG_MAX + : static_cast(LLONG_MAX) + 1); + unsigned long long const LLONG_MAX_DIV_BY_BASE = CUR_LLONG_MAX / base; + while (isalnum(*src)) { + int cur_digit = b36_char_to_int(*src); + if (cur_digit >= base) + break; + if (result > LLONG_MAX_DIV_BY_BASE) { + result = CUR_LLONG_MAX; + llvmlibc_errno = ERANGE; + break; + } + result = result * base; + if (result > CUR_LLONG_MAX - cur_digit) { + result = CUR_LLONG_MAX; + llvmlibc_errno = ERANGE; + break; + } + result = result + cur_digit; + + ++src; + } + + if (str_end != nullptr) + *str_end = (char *)(src); + if (result_sign == '+') + return result; + else + return -result; +} + +} // namespace internal +} // namespace __llvm_libc + +#endif // LIBC_SRC_STDLIB_STDLIB_UTILS_H diff --git a/libc/src/stdlib/CMakeLists.txt b/libc/src/stdlib/CMakeLists.txt --- a/libc/src/stdlib/CMakeLists.txt +++ b/libc/src/stdlib/CMakeLists.txt @@ -50,3 +50,13 @@ DEPENDS libc.src.__support.integer_operations ) + +add_entrypoint_object( + strtoll + SRCS + strtoll.cpp + HDRS + strtoll.h + DEPENDS + libc.src.__support.str_conv_utils +) diff --git a/libc/src/stdlib/strtoll.h b/libc/src/stdlib/strtoll.h new file mode 100644 --- /dev/null +++ b/libc/src/stdlib/strtoll.h @@ -0,0 +1,19 @@ +//===-- Implementation header for strtoll -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDLIB_STRTOLL_H +#define LLVM_LIBC_SRC_STDLIB_STRTOLL_H + +namespace __llvm_libc { + +long long strtoll(const char *__restrict str, char **__restrict str_end, + int base); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDLIB_STRTOLL_H diff --git a/libc/src/stdlib/strtoll.cpp b/libc/src/stdlib/strtoll.cpp new file mode 100644 --- /dev/null +++ b/libc/src/stdlib/strtoll.cpp @@ -0,0 +1,21 @@ +//===-- Implementation of strtoll -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdlib/strtoll.h" +#include "src/__support/common.h" +#include "src/__support/str_conv_utils.h" + +namespace __llvm_libc { + +LLVM_LIBC_FUNCTION(long long, strtoll, + (const char *__restrict str, char **__restrict str_end, + int base)) { + return internal::strtoll(str, str_end, base); +} + +} // namespace __llvm_libc diff --git a/libc/test/src/stdlib/CMakeLists.txt b/libc/test/src/stdlib/CMakeLists.txt --- a/libc/test/src/stdlib/CMakeLists.txt +++ b/libc/test/src/stdlib/CMakeLists.txt @@ -54,3 +54,15 @@ DEPENDS libc.src.stdlib.llabs ) + +add_libc_unittest( + strtoll_test + SUITE + libc_stdlib_unittests + SRCS + strtoll_test.cpp + DEPENDS + libc.src.stdlib.strtoll + libc.include.errno + libc.test.errno_setter_matcher +) diff --git a/libc/test/src/stdlib/strtoll_test.cpp b/libc/test/src/stdlib/strtoll_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/src/stdlib/strtoll_test.cpp @@ -0,0 +1,224 @@ +//===-- Unittests for strtoll ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdlib/strtoll.h" + +#include "test/ErrnoSetterMatcher.h" +#include "utils/UnitTest/Test.h" + +#include +#include + +using __llvm_libc::testing::ErrnoSetterMatcher::Fails; +using __llvm_libc::testing::ErrnoSetterMatcher::Succeeds; + +TEST(LlvmLibcStrToLLTest, InvalidBase) { + const char *ten = "10"; + ASSERT_THAT(__llvm_libc::strtoll(ten, nullptr, -1), Fails(EINVAL, 0)); +} + +TEST(LlvmLibcStrToLLTest, CleanBaseTenDecode) { + char *str_end = nullptr; + + const char *ten = "10"; + ASSERT_THAT(__llvm_libc::strtoll(ten, &str_end, 10), Succeeds(10)); + EXPECT_EQ(str_end - ten, 2l); + ASSERT_THAT(__llvm_libc::strtoll(ten, nullptr, 10), Succeeds(10)); + + const char *hundred = "100"; + ASSERT_THAT(__llvm_libc::strtoll(hundred, &str_end, 10), Succeeds(100)); + EXPECT_EQ(str_end - hundred, 3l); + + const char *negative = "-100"; + ASSERT_THAT(__llvm_libc::strtoll(negative, &str_end, 10), Succeeds(-100)); + EXPECT_EQ(str_end - negative, 4l); + + const char *big_number = "123456789012345"; + ASSERT_THAT(__llvm_libc::strtoll(big_number, &str_end, 10), + Succeeds(123456789012345)); + EXPECT_EQ(str_end - big_number, 15l); + + const char *big_negative_number = "-123456789012345"; + ASSERT_THAT(__llvm_libc::strtoll(big_negative_number, &str_end, 10), + Succeeds(-123456789012345)); + EXPECT_EQ(str_end - big_negative_number, 16l); + + const char *too_big_number = "123456789012345678901"; + ASSERT_THAT(__llvm_libc::strtoll(too_big_number, &str_end, 10), + Fails(ERANGE, LLONG_MAX)); + EXPECT_EQ(str_end - too_big_number, 19l); + + const char *too_big_negative_number = "-123456789012345678901"; + ASSERT_THAT(__llvm_libc::strtoll(too_big_negative_number, &str_end, 10), + Fails(ERANGE, LLONG_MIN)); + EXPECT_EQ(str_end - too_big_negative_number, 20l); +} + +TEST(LlvmLibcStrToLLTest, MessyBaseTenDecode) { + char *str_end = nullptr; + + const char *spaces_before = " 10"; + ASSERT_THAT(__llvm_libc::strtoll(spaces_before, &str_end, 10), Succeeds(10)); + EXPECT_EQ(str_end - spaces_before, 7l); + + const char *spaces_after = "10 "; + ASSERT_THAT(__llvm_libc::strtoll(spaces_after, &str_end, 10), Succeeds(10)); + EXPECT_EQ(str_end - spaces_after, 2l); + + const char *word_before = "word10"; + ASSERT_THAT(__llvm_libc::strtoll(word_before, &str_end, 10), Succeeds(0)); + EXPECT_EQ(str_end - word_before, 0l); + + const char *word_after = "10word"; + ASSERT_THAT(__llvm_libc::strtoll(word_after, &str_end, 10), Succeeds(10)); + EXPECT_EQ(str_end - word_after, 2l); + + const char *two_numbers = "10 999"; + ASSERT_THAT(__llvm_libc::strtoll(two_numbers, &str_end, 10), Succeeds(10)); + EXPECT_EQ(str_end - two_numbers, 2l); + + const char *two_signs = "--10 999"; + ASSERT_THAT(__llvm_libc::strtoll(two_signs, &str_end, 10), Succeeds(0)); + EXPECT_EQ(str_end - two_signs, 1l); + + const char *sign_before = "+2=4"; + ASSERT_THAT(__llvm_libc::strtoll(sign_before, &str_end, 10), Succeeds(2)); + EXPECT_EQ(str_end - sign_before, 2l); + + const char *sign_after = "2+2=4"; + ASSERT_THAT(__llvm_libc::strtoll(sign_after, &str_end, 10), Succeeds(2)); + EXPECT_EQ(str_end - sign_after, 1l); + + const char *tab_before = "\t10"; + ASSERT_THAT(__llvm_libc::strtoll(tab_before, &str_end, 10), Succeeds(10)); + EXPECT_EQ(str_end - tab_before, 3l); + + const char *all_together = "\t -12345and+67890"; + ASSERT_THAT(__llvm_libc::strtoll(all_together, &str_end, 10), + Succeeds(-12345)); + EXPECT_EQ(str_end - all_together, 9l); +} + +static char int_to_b36_char(int input) { + if (input < 0 || input > 36) + return '0'; + if (input < 10) + return '0' + input; + return 'A' + input - 10; +} + +TEST(LlvmLibcStrToLLTest, DecodeInOtherBases) { + char small_string[4] = {'\0', '\0', '\0', '\0'}; + for (int base = 2; base <= 36; ++base) { + for (int first_digit = 0; first_digit <= 36; ++first_digit) { + small_string[0] = int_to_b36_char(first_digit); + if (first_digit < base) { + ASSERT_THAT(__llvm_libc::strtoll(small_string, nullptr, base), + Succeeds(first_digit)); + } else { + ASSERT_THAT(__llvm_libc::strtoll(small_string, nullptr, base), + Succeeds(0)); + } + } + } + + for (int base = 2; base <= 36; ++base) { + for (int first_digit = 0; first_digit <= 36; ++first_digit) { + small_string[0] = int_to_b36_char(first_digit); + for (int second_digit = 0; second_digit <= 36; ++second_digit) { + small_string[1] = int_to_b36_char(second_digit); + if (first_digit < base && second_digit < base) { + ASSERT_THAT(__llvm_libc::strtoll(small_string, nullptr, base), + Succeeds(second_digit + (first_digit * base))); + } else if (first_digit < base) { + ASSERT_THAT(__llvm_libc::strtoll(small_string, nullptr, base), + Succeeds(first_digit)); + } else { + ASSERT_THAT(__llvm_libc::strtoll(small_string, nullptr, base), + Succeeds(0)); + } + } + } + } + + for (int base = 2; base <= 36; ++base) { + for (int first_digit = 0; first_digit <= 36; ++first_digit) { + small_string[0] = int_to_b36_char(first_digit); + for (int second_digit = 0; second_digit <= 36; ++second_digit) { + small_string[1] = int_to_b36_char(second_digit); + for (int third_digit = 0; third_digit <= 36; ++third_digit) { + small_string[2] = int_to_b36_char(third_digit); + + if (first_digit < base && second_digit < base && third_digit < base) { + ASSERT_THAT(__llvm_libc::strtoll(small_string, nullptr, base), + Succeeds(third_digit + (second_digit * base) + + (first_digit * base * base))); + } else if (first_digit < base && second_digit < base) { + ASSERT_THAT(__llvm_libc::strtoll(small_string, nullptr, base), + Succeeds(second_digit + (first_digit * base))); + } else if (first_digit < base) { + // if the base is 16 there is a special case for the prefix 0X. + // The number is treated as a one digit hexadecimal. + if (base == 16 && first_digit == 0 && second_digit == 33) { + if (third_digit < base) { + ASSERT_THAT(__llvm_libc::strtoll(small_string, nullptr, base), + Succeeds(third_digit)); + } else { + ASSERT_THAT(__llvm_libc::strtoll(small_string, nullptr, base), + Succeeds(0)); + } + } else { + ASSERT_THAT(__llvm_libc::strtoll(small_string, nullptr, base), + Succeeds(first_digit)); + } + } else { + ASSERT_THAT(__llvm_libc::strtoll(small_string, nullptr, base), + Succeeds(0)); + } + } + } + } + } +} + +TEST(LlvmLibcStrToLLTest, CleanBaseSixteenDecode) { + char *str_end = nullptr; + + const char *no_prefix = "123abc"; + ASSERT_THAT(__llvm_libc::strtoll(no_prefix, &str_end, 16), + Succeeds(0x123abc)); + EXPECT_EQ(str_end - no_prefix, 6l); + + const char *yes_prefix = "0x456def"; + ASSERT_THAT(__llvm_libc::strtoll(yes_prefix, &str_end, 16), + Succeeds(0x456def)); + EXPECT_EQ(str_end - yes_prefix, 8l); +} + +TEST(LlvmLibcStrToLLTest, AutomaticBaseSelection) { + char *str_end = nullptr; + + const char *base_ten = "12345"; + ASSERT_THAT(__llvm_libc::strtoll(base_ten, &str_end, 0), Succeeds(12345)); + EXPECT_EQ(str_end - base_ten, 5l); + + const char *base_sixteen_no_prefix = "123abc"; + ASSERT_THAT(__llvm_libc::strtoll(base_sixteen_no_prefix, &str_end, 0), + Succeeds(123)); + EXPECT_EQ(str_end - base_sixteen_no_prefix, 3l); + + const char *base_sixteen_with_prefix = "0x456def"; + ASSERT_THAT(__llvm_libc::strtoll(base_sixteen_with_prefix, &str_end, 0), + Succeeds(0x456def)); + EXPECT_EQ(str_end - base_sixteen_with_prefix, 8l); + + const char *base_eight_with_prefix = "012345"; + ASSERT_THAT(__llvm_libc::strtoll(base_eight_with_prefix, &str_end, 0), + Succeeds(012345)); + EXPECT_EQ(str_end - base_eight_with_prefix, 6l); +}