diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -250,6 +250,9 @@ # string.h entrypoints that depend on malloc libc.src.string.strdup libc.src.string.strndup + + # stdio.h entrypoints that depend on malloc + libc.src.stdio.sprintf ) endif() diff --git a/libc/src/CMakeLists.txt b/libc/src/CMakeLists.txt --- a/libc/src/CMakeLists.txt +++ b/libc/src/CMakeLists.txt @@ -12,6 +12,8 @@ add_subdirectory(sys) endif() +add_subdirectory(stdio) + if(NOT LLVM_LIBC_FULL_BUILD) return() endif() @@ -20,7 +22,6 @@ # since assert uses the signal API, we disable assert also. # add_subdirectory(assert) # add_subdirectory(signal) -add_subdirectory(stdio) add_subdirectory(threads) add_subdirectory(time) add_subdirectory(unistd) diff --git a/libc/src/stdio/CMakeLists.txt b/libc/src/stdio/CMakeLists.txt --- a/libc/src/stdio/CMakeLists.txt +++ b/libc/src/stdio/CMakeLists.txt @@ -1,3 +1,26 @@ +add_header_library( + printf_conv_core + HDRS + printf_conv_core.h + DEPENDS + libc.src.string.memory_utils.memcpy_implementation + libc.src.string.memory_utils.memset_implementation +) + +add_header_library( + printf_impl + HDRS + printf_impl.h + DEPENDS + libc.src.stdio.printf_conv_core + libc.src.string.memory_utils.memcpy_implementation + libc.src.string.memory_utils.memset_implementation + libc.src.string.string_utils + libc.src.__support.ctype_utils + libc.src.__support.str_to_integer + libc.include.stdlib +) + add_entrypoint_object( fwrite SRCS @@ -8,3 +31,13 @@ libc.src.threads.mtx_lock libc.src.threads.mtx_unlock ) + +add_entrypoint_object( + sprintf + SRCS + sprintf.cpp + HDRS + sprintf.h + DEPENDS + libc.src.stdio.printf_impl +) diff --git a/libc/src/stdio/printf_conv_core.h b/libc/src/stdio/printf_conv_core.h new file mode 100644 --- /dev/null +++ b/libc/src/stdio/printf_conv_core.h @@ -0,0 +1,95 @@ +//===-- Core Definitions for printf conversions -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// #include "src/stdio/FILE.h" + +#include "src/string/memory_utils/memcpy_implementations.h" +#include "src/string/memory_utils/memset_implementations.h" + +#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CONV_CORE_H +#define LLVM_LIBC_SRC_STDIO_PRINTF_CONV_CORE_H +namespace __llvm_libc { +namespace internal { + +class OutputBuffer { +private: + size_t max_size = 0; + char *char_buffer = 0; + // FILE file_buffer; + size_t cur_pos = 0; + enum class BuffType { string, file } buff_type; + + void char_write(const char *new_string, size_t string_length) { + if (cur_pos + string_length <= max_size) { + __llvm_libc::inline_memcpy(char_buffer + cur_pos, new_string, + string_length); + } else if (cur_pos < max_size) { + __llvm_libc::inline_memcpy(char_buffer + cur_pos, new_string, + max_size - cur_pos); + } + } + + void file_write(const char *new_string, size_t string_length) { + // TODO(michaelrj): add FILE support. + } + +public: + OutputBuffer(size_t initial_max_size, char *initial_buffer) + : max_size{initial_max_size - 1}, char_buffer{initial_buffer} { + buff_type = BuffType::string; + } + + // TODO(michaelrj): add FILE support. + /* + OutputBuffer(size_t initial_max_size, FILE initial_buffer) + : max_size{initial_max_size - 1}, file_buffer{initial_buffer} { + buff_type = BuffType::file; + } + */ + + void write(const char *new_string, size_t string_length) { + if (buff_type == BuffType::string) { + char_write(new_string, string_length); + } else { + file_write(new_string, string_length); + } + cur_pos += string_length; + } + + size_t get_bytes_written() { return cur_pos; } + + void terminate() { + if (buff_type == BuffType::string) { + char_write("\0", 1); + } + } +}; + +enum class LengthModifier { hh, h, l, ll, j, z, t, L, none }; + +struct FormatFlags { + bool left_justified = false; + bool force_sign = false; + bool space_prefix = false; + bool alt_form = false; + bool leading_zeroes = false; +}; + +void pad_string(const char *src, size_t src_len, size_t final_len, + FormatFlags flags, char *dest) { + char padding_char = flags.leading_zeroes ? '0' : ' '; + inline_memset(dest, padding_char, final_len); + if (flags.left_justified) + inline_memcpy(dest, src, src_len); + else + inline_memcpy(dest + (final_len - src_len), src, src_len); +} + +} // namespace internal +} // namespace __llvm_libc +#endif // LLVM_LIBC_SRC_STDIO_PRINTF_CONV_CORE_H diff --git a/libc/src/stdio/printf_impl.h b/libc/src/stdio/printf_impl.h new file mode 100644 --- /dev/null +++ b/libc/src/stdio/printf_impl.h @@ -0,0 +1,270 @@ +//===-- Internal definition of printf ---------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/ctype_utils.h" +#include "src/__support/str_to_integer.h" +#include "src/stdio/printf_conv_core.h" +#include "src/string/memory_utils/memcpy_implementations.h" +#include "src/string/memory_utils/memset_implementations.h" +#include "src/string/string_utils.h" +#include +#include + +#include // for malloc + +#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_IMPL_H +#define LLVM_LIBC_SRC_STDIO_PRINTF_IMPL_H +namespace __llvm_libc { +namespace internal { + +void string_conversion(OutputBuffer &output, FormatFlags flags, + size_t min_width, int precision, + LengthModifier length_modifier, char *string_arg) { + if (length_modifier == LengthModifier::l) { + // TODO(michaelrj): add support for wide characters and add conversion here. + } else { + size_t string_len = internal::string_length(string_arg); + if (precision != -1 && string_len > static_cast(precision)) { + string_len = precision; + } + if (min_width > string_len) { + char *padded_str = static_cast(malloc(min_width)); + flags.leading_zeroes = false; + pad_string(string_arg, string_len, min_width, flags, padded_str); + output.write(padded_str, min_width); + free(padded_str); + } else + output.write(string_arg, string_len); + } +} + +void parse_format_specifier_sequential(OutputBuffer &output, + const char *__restrict format, + va_list vlist, size_t *spec_len) { + // format is assumed to point to the first character after a '%'. + FormatFlags flags; + size_t min_width = 0; + int precision = -1; + LengthModifier length_modifier = LengthModifier::none; + size_t cur_pos = 0; + + // Handle flags + bool found_flag = true; + while (found_flag) { + switch (format[cur_pos]) { + case '-': + flags.left_justified = true; + break; + case '+': + flags.force_sign = true; + break; + case ' ': + flags.space_prefix = true; + break; + case '#': + flags.alt_form = true; + break; + case '0': + flags.leading_zeroes = true; + break; + default: + found_flag = false; + } + if (found_flag) + ++cur_pos; + } + + // Handle width + if (isdigit(format[cur_pos]) || (format[cur_pos] == '*')) { + if (format[cur_pos] == '*') { + int temp_width = 0; + temp_width = va_arg(vlist, int); + if (temp_width < 0) { + flags.left_justified = true; + min_width = (-static_cast(temp_width + 1)) + 1; + } else + min_width = temp_width; + ++cur_pos; + } else { + char *int_end = 0; + min_width = strtointeger(format + cur_pos, &int_end, 10); + cur_pos = int_end - format; + } + } + + // Handle precision + if (format[cur_pos] == '.') { + precision = 0; + ++cur_pos; + if (isdigit(format[cur_pos]) || (format[cur_pos] == '*')) { + if (format[cur_pos] == '*') { + precision = va_arg(vlist, int); + ++cur_pos; + } else { + char *int_end = 0; + precision = strtointeger(format + cur_pos, &int_end, 10); + cur_pos = int_end - format; + } + } + if (precision < 0) { + precision = -1; + } + } + + // Handle length modifiers + switch (format[cur_pos]) { + case ('l'): + if (format[cur_pos + 1] == 'l') { + length_modifier = LengthModifier::ll; + cur_pos += 2; + } else { + length_modifier = LengthModifier::l; + ++cur_pos; + } + break; + case ('h'): + if (format[cur_pos + 1] == 'h') { + length_modifier = LengthModifier::hh; + cur_pos += 2; + } else { + length_modifier = LengthModifier::h; + ++cur_pos; + } + break; + case ('L'): + length_modifier = LengthModifier::L; + ++cur_pos; + break; + case ('j'): + length_modifier = LengthModifier::j; + ++cur_pos; + break; + case ('z'): + length_modifier = LengthModifier::z; + ++cur_pos; + break; + case ('t'): + length_modifier = LengthModifier::t; + ++cur_pos; + break; + } + + // Handle format specifiers + switch (format[cur_pos]) { + case ('%'): + output.write("%", 1); + ++cur_pos; + break; + case ('s'): { + char *string_arg = va_arg(vlist, char *); + string_conversion(output, flags, min_width, precision, length_modifier, + string_arg); + ++cur_pos; + break; + } + case ('c'): { + char char_string[1]; + *char_string = static_cast(va_arg(vlist, int)); + string_conversion(output, flags, min_width, 1, length_modifier, + char_string); + ++cur_pos; + break; + } +#ifndef LLVM_LIBC_PRINTF_REMOVE_CONV_n + case ('n'): { + int *out_arg = va_arg(vlist, int *); + *out_arg = output.get_bytes_written(); + ++cur_pos; + break; + } +#endif // LLVM_LIBC_PRINTF_REMOVE_CONV_n + + default: + // If this state is reached, then an invalid conversion was requested. In + // this case printf will just print the format string as a literal, although + // this is not required as this behavior is undefined. + cur_pos = 0; + break; + } + + *spec_len = cur_pos; +} + +int printf_impl(OutputBuffer &output, const char *__restrict format, + va_list vlist) { + size_t cur_pos = 0; + size_t end_of_written = 0; + char cur_char = format[cur_pos]; + enum class ParseMode { + undetermined, + sequential, + index + } parse_mode = ParseMode::undetermined; + + while (cur_char) { + if (cur_char == '%') { + output.write(format + end_of_written, cur_pos - end_of_written); + + if (parse_mode == ParseMode::undetermined) { + // The index mode format always starts with %n$ where n is an integer in + // the range [1,{NL_ARGMAX}]. + // The sequential mode format never has a $ in it + // %% can be used regardless of parse mode, and leaves parse_mode in + // undetermined. + + if (format[cur_pos + 1] == '%') { + output.write("%", 1); + ++cur_pos; + end_of_written = cur_pos; + } else if (!isdigit(format[cur_pos + 1])) { + parse_mode = ParseMode::sequential; + } else { + // Here we know that the specifier is a % symbol followed by a digit, + // now to determine if there is a $ after it. + size_t first_after; + for (first_after = cur_pos; + isdigit(format[first_after]) && format[first_after]; + ++first_after) { + ; + } + if (format[first_after] == '$') { + parse_mode = ParseMode::index; + } else { + parse_mode = ParseMode::sequential; + } + } + } + + if (parse_mode == ParseMode::sequential) { + size_t spec_len = 0; + parse_format_specifier_sequential(output, format + cur_pos + 1, vlist, + &spec_len); + if (spec_len != 0) { + // advance cur_pos to the last char of the format specifier, because + // the loop always increments cur_pos at the end before updating + // cur_char. Also set end_of_written to the char after that because it + // is not advanced automatically. + cur_pos += spec_len; + end_of_written = cur_pos + 1; + } + } else if (parse_mode == ParseMode::index) { + // TODO(michaelrj): add index mode parsing + } + } + ++cur_pos; + cur_char = format[cur_pos]; + } + if (end_of_written < cur_pos) + output.write(format + end_of_written, cur_pos - end_of_written); + output.terminate(); + return output.get_bytes_written(); +} + +} // namespace internal +} // namespace __llvm_libc +#endif // LLVM_LIBC_SRC_STDIO_PRINTF_IMPL_H diff --git a/libc/src/stdio/sprintf.h b/libc/src/stdio/sprintf.h new file mode 100644 --- /dev/null +++ b/libc/src/stdio/sprintf.h @@ -0,0 +1,20 @@ +//===-- Implementation header for sprintf -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_SPRINTF_H +#define LLVM_LIBC_SRC_STDIO_SPRINTF_H + +#include + +namespace __llvm_libc { + +int sprintf(char *__restrict buffer, const char *__restrict format, ...); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_SPRINTF_H diff --git a/libc/src/stdio/sprintf.cpp b/libc/src/stdio/sprintf.cpp new file mode 100644 --- /dev/null +++ b/libc/src/stdio/sprintf.cpp @@ -0,0 +1,28 @@ +//===-- Implementation of sprintf -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/sprintf.h" +#include "src/__support/common.h" +#include "src/stdio/printf_impl.h" +#include + +namespace __llvm_libc { + +LLVM_LIBC_FUNCTION(int, sprintf, + (char *__restrict buffer, const char *__restrict format, + ...)) { + va_list vlist; + va_start(vlist, format); + internal::OutputBuffer output_buffer = + internal::OutputBuffer(SIZE_MAX, buffer); + int ret_val = internal::printf_impl(output_buffer, format, vlist); + va_end(vlist); + return ret_val; +} + +} // namespace __llvm_libc diff --git a/libc/test/src/CMakeLists.txt b/libc/test/src/CMakeLists.txt --- a/libc/test/src/CMakeLists.txt +++ b/libc/test/src/CMakeLists.txt @@ -33,6 +33,7 @@ add_subdirectory(math) add_subdirectory(string) add_subdirectory(stdlib) +add_subdirectory(stdio) if(${LIBC_TARGET_OS} STREQUAL "linux") add_subdirectory(sys) @@ -46,7 +47,6 @@ # since assert uses the signal API, we disable assert also. # add_subdirectory(assert) # add_subdirectory(signal) -add_subdirectory(stdio) add_subdirectory(threads) add_subdirectory(time) add_subdirectory(unistd) diff --git a/libc/test/src/stdio/CMakeLists.txt b/libc/test/src/stdio/CMakeLists.txt --- a/libc/test/src/stdio/CMakeLists.txt +++ b/libc/test/src/stdio/CMakeLists.txt @@ -9,3 +9,14 @@ DEPENDS libc.src.stdio.fwrite ) + + +add_libc_unittest( + sprintf_test + SUITE + libc_stdio_unittests + SRCS + sprintf_test.cpp + DEPENDS + libc.src.stdio.sprintf +) diff --git a/libc/test/src/stdio/sprintf_test.cpp b/libc/test/src/stdio/sprintf_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/src/stdio/sprintf_test.cpp @@ -0,0 +1,143 @@ +//===-- Unittests for sprintf ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/sprintf.h" +#include "utils/UnitTest/Test.h" + +TEST(LlvmLibcSPrintfTest, NoConversion) { + const char *format = "abcABC123!@#\n"; + char result[20]; + __llvm_libc::sprintf(result, format); + ASSERT_STREQ(format, result); +} + +TEST(LlvmLibcSPrintfTest, SimpleStringConversion) { + const char *input_string = "abcDEF"; + char result[20]; + __llvm_libc::sprintf(result, "%s", input_string); + ASSERT_STREQ(input_string, result); +} + +TEST(LlvmLibcSPrintfTest, StringConversionWithWidth) { + const char *input_string = "abcDEF"; + char result[20]; + int return_val = 0; + + return_val = __llvm_libc::sprintf(result, "%10s", input_string); + EXPECT_EQ(return_val, 10); + ASSERT_STREQ(" abcDEF", result); + + return_val = __llvm_libc::sprintf(result, "%-10s", input_string); + EXPECT_EQ(return_val, 10); + ASSERT_STREQ("abcDEF ", result); + + return_val = __llvm_libc::sprintf(result, "%*s", 10, input_string); + EXPECT_EQ(return_val, 10); + ASSERT_STREQ(" abcDEF", result); + + return_val = __llvm_libc::sprintf(result, "%*s", -10, input_string); + EXPECT_EQ(return_val, 10); + ASSERT_STREQ("abcDEF ", result); + + return_val = __llvm_libc::sprintf(result, "%6s", input_string); + EXPECT_EQ(return_val, 6); + ASSERT_STREQ("abcDEF", result); + + return_val = __llvm_libc::sprintf(result, "%3s", input_string); + EXPECT_EQ(return_val, 6); + ASSERT_STREQ("abcDEF", result); + + return_val = __llvm_libc::sprintf(result, "%0s", input_string); + EXPECT_EQ(return_val, 6); + ASSERT_STREQ("abcDEF", result); +} + +TEST(LlvmLibcSPrintfTest, StringConversionWithPrecision) { + const char *input_string = "abcDEF"; + char result[20]; + int return_val = 0; + + return_val = __llvm_libc::sprintf(result, "%.10s", input_string); + EXPECT_EQ(return_val, 6); + ASSERT_STREQ("abcDEF", result); + + return_val = __llvm_libc::sprintf(result, "%.6s", input_string); + EXPECT_EQ(return_val, 6); + ASSERT_STREQ("abcDEF", result); + + return_val = __llvm_libc::sprintf(result, "%.3s", input_string); + EXPECT_EQ(return_val, 3); + ASSERT_STREQ("abc", result); + + return_val = __llvm_libc::sprintf(result, "%.1s", input_string); + EXPECT_EQ(return_val, 1); + ASSERT_STREQ("a", result); + + return_val = __llvm_libc::sprintf(result, "%.0s", input_string); + EXPECT_EQ(return_val, 0); + ASSERT_STREQ("", result); + + return_val = __llvm_libc::sprintf(result, "%.s", input_string); + EXPECT_EQ(return_val, 0); + ASSERT_STREQ("", result); + + return_val = __llvm_libc::sprintf(result, "%.*s", 10, input_string); + EXPECT_EQ(return_val, 6); + ASSERT_STREQ("abcDEF", result); + + return_val = __llvm_libc::sprintf(result, "%.*s", 6, input_string); + EXPECT_EQ(return_val, 6); + ASSERT_STREQ("abcDEF", result); + + return_val = __llvm_libc::sprintf(result, "%.*s", 3, input_string); + EXPECT_EQ(return_val, 3); + ASSERT_STREQ("abc", result); + + // Negative precisions are ignored + return_val = __llvm_libc::sprintf(result, "%.*s", -3, input_string); + EXPECT_EQ(return_val, 6); + ASSERT_STREQ("abcDEF", result); +} + +TEST(LlvmLibcSPrintfTest, StringConversionWithWidthAndPrecision) { + const char *input_string = "abcDEF"; + char result[20]; + int return_val = 0; + + return_val = __llvm_libc::sprintf(result, "%10.10s", input_string); + EXPECT_EQ(return_val, 10); + ASSERT_STREQ(" abcDEF", result); + + return_val = __llvm_libc::sprintf(result, "%6.6s", input_string); + EXPECT_EQ(return_val, 6); + ASSERT_STREQ("abcDEF", result); + + return_val = __llvm_libc::sprintf(result, "%3.3s", input_string); + EXPECT_EQ(return_val, 3); + ASSERT_STREQ("abc", result); + + return_val = __llvm_libc::sprintf(result, "%6.3s", input_string); + EXPECT_EQ(return_val, 6); + ASSERT_STREQ(" abc", result); + + return_val = __llvm_libc::sprintf(result, "%-6.3s", input_string); + EXPECT_EQ(return_val, 6); + ASSERT_STREQ("abc ", result); + + return_val = __llvm_libc::sprintf(result, "%*.3s", 6, input_string); + EXPECT_EQ(return_val, 6); + ASSERT_STREQ(" abc", result); + + return_val = __llvm_libc::sprintf(result, "%6.*s", 3, input_string); + EXPECT_EQ(return_val, 6); + ASSERT_STREQ(" abc", result); + + return_val = __llvm_libc::sprintf(result, "%*.*s", 6, 3, input_string); + EXPECT_EQ(return_val, 6); + ASSERT_STREQ(" abc", result); +}