diff --git a/libc/src/__support/CPP/Limits.h b/libc/src/__support/CPP/Limits.h --- a/libc/src/__support/CPP/Limits.h +++ b/libc/src/__support/CPP/Limits.h @@ -52,6 +52,26 @@ static constexpr unsigned long long max() { return ULLONG_MAX; } static constexpr unsigned long long min() { return 0; } }; +template <> class NumericLimits { +public: + static constexpr short max() { return SHRT_MAX; } + static constexpr short min() { return SHRT_MIN; } +}; +template <> class NumericLimits { +public: + static constexpr unsigned short max() { return USHRT_MAX; } + static constexpr unsigned short min() { return 0; } +}; +template <> class NumericLimits { +public: + static constexpr char max() { return CHAR_MAX; } + static constexpr char min() { return CHAR_MIN; } +}; +template <> class NumericLimits { +public: + static constexpr unsigned char max() { return UCHAR_MAX; } + static constexpr unsigned char min() { return 0; } +}; #ifdef __SIZEOF_INT128__ template <> class NumericLimits<__uint128_t> { public: diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt --- a/libc/src/stdio/printf_core/CMakeLists.txt +++ b/libc/src/stdio/printf_core/CMakeLists.txt @@ -55,6 +55,7 @@ converter_atlas.h string_converter.h char_converter.h + int_converter.h DEPENDS .writer .core_structs diff --git a/libc/src/stdio/printf_core/char_converter.h b/libc/src/stdio/printf_core/char_converter.h --- a/libc/src/stdio/printf_core/char_converter.h +++ b/libc/src/stdio/printf_core/char_converter.h @@ -12,7 +12,7 @@ namespace __llvm_libc { namespace printf_core { -void convert_char(Writer *writer, const FormatSection &to_conv) { +void inline convert_char(Writer *writer, const FormatSection &to_conv) { char c = to_conv.conv_val_raw; if (to_conv.min_width > 1) { diff --git a/libc/src/stdio/printf_core/converter.cpp b/libc/src/stdio/printf_core/converter.cpp --- a/libc/src/stdio/printf_core/converter.cpp +++ b/libc/src/stdio/printf_core/converter.cpp @@ -42,7 +42,7 @@ case 'd': case 'i': case 'u': - // convert_int(writer, to_conv); + convert_int(writer, to_conv); return; case 'o': // convert_oct(writer, to_conv); diff --git a/libc/src/stdio/printf_core/converter_atlas.h b/libc/src/stdio/printf_core/converter_atlas.h --- a/libc/src/stdio/printf_core/converter_atlas.h +++ b/libc/src/stdio/printf_core/converter_atlas.h @@ -20,6 +20,8 @@ #include "src/stdio/printf_core/char_converter.h" // defines convert_int +#include "src/stdio/printf_core/int_converter.h" + // defines convert_oct // defines convert_hex diff --git a/libc/src/stdio/printf_core/int_converter.h b/libc/src/stdio/printf_core/int_converter.h new file mode 100644 --- /dev/null +++ b/libc/src/stdio/printf_core/int_converter.h @@ -0,0 +1,169 @@ +//===-- Integer Converter for printf ----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/Limits.h" +#include "src/stdio/printf_core/core_structs.h" +#include "src/stdio/printf_core/writer.h" + +#include +#include + +namespace __llvm_libc { +namespace printf_core { + +void inline convert_int(Writer *writer, const FormatSection &to_conv) { + static constexpr size_t BITS_IN_BYTE = 8; + static constexpr size_t BITS_IN_NUM = sizeof(uintmax_t) * BITS_IN_BYTE; + + // This approximates the number of digits it takes to represent an integer of + // a certain number of bits. The calculation is floor((bits * 5) / 16) + // 32 -> 10 (actually needs 10) + // 64 -> 20 (actually needs 20) + // 128 -> 40 (actually needs 39) + // This estimation grows slightly faster than the actual value, but is close + // enough. + + static constexpr size_t BUFF_LEN = + ((sizeof(uintmax_t) * BITS_IN_BYTE * 5) / 16); + uintmax_t num = to_conv.conv_val_raw; + char buffer[BUFF_LEN]; + bool is_negative = false; + FormatFlags flags = to_conv.flags; + + if (to_conv.conv_name == 'u') { + // These flags are only for signed conversions, so this removes them if the + // conversion is unsigned. + flags = static_cast( + flags & ~(FormatFlags::FORCE_SIGN | FormatFlags::SPACE_PREFIX)); + } else { + // Check if the number is negative by checking the high bit. This works even + // for smaller numbers because they're sign extended by default. + if ((num & (uintmax_t(1) << (BITS_IN_NUM - 1))) > 0) { + is_negative = true; + num = -num; + } + } + + switch (to_conv.length_modifier) { + case LengthModifier::none: + num = num & cpp::NumericLimits::max(); + break; + + case LengthModifier::l: + num = num & cpp::NumericLimits::max(); + break; + case LengthModifier::ll: + case LengthModifier::L: + num = num & cpp::NumericLimits::max(); + break; + case LengthModifier::h: + num = num & cpp::NumericLimits::max(); + break; + case LengthModifier::hh: + num = num & cpp::NumericLimits::max(); + break; + case LengthModifier::z: + num = num & cpp::NumericLimits::max(); + break; + case LengthModifier::t: + // We don't have unsigned ptrdiff so uintptr_t is used, since we need an + // unsigned type and ptrdiff is usually the same size as a pointer. + static_assert(sizeof(ptrdiff_t) == sizeof(uintptr_t)); + num = num & cpp::NumericLimits::max(); + break; + case LengthModifier::j: + // j is intmax, so no mask is necessary. + break; + } + + // buff_cur can never reach 0, since the buffer is sized to always be able to + // contain the whole integer. This means that bounds checking it should be + // unnecessary. + size_t buff_cur = BUFF_LEN; + for (; num > 0 /* && buff_cur > 0 */; --buff_cur, num /= 10) + buffer[buff_cur - 1] = (num % 10) + '0'; + + size_t digits_written = BUFF_LEN - buff_cur; + + char sign_char = 0; + + if (is_negative) + sign_char = '-'; + else if ((flags & FormatFlags::FORCE_SIGN) == FormatFlags::FORCE_SIGN) + sign_char = '+'; // FORCE_SIGN has precedence over SPACE_PREFIX + else if ((flags & FormatFlags::SPACE_PREFIX) == FormatFlags::SPACE_PREFIX) + sign_char = ' '; + + int sign_char_len = (sign_char == 0 ? 0 : 1); + + // These are signed to prevent underflow due to negative values. The eventual + // values will always be non-negative. + int zeroes; + int spaces; + + // Negative precision indicates that it was not specified. + if (to_conv.precision < 0) { + if ((flags & (FormatFlags::LEADING_ZEROES | FormatFlags::LEFT_JUSTIFIED)) == + FormatFlags::LEADING_ZEROES) { + // If this conv has flag 0 but not - and no specified precision, it's + // padded with 0's instead of spaces identically to if precision = + // min_width - (1 if sign_char). For example: ("%+04d", 1) -> "+001" + zeroes = to_conv.min_width - digits_written - sign_char_len; + if (zeroes < 0) + zeroes = 0; + spaces = 0; + } else if (digits_written < 1) { + // If no precision is specified, precision defaults to 1. This means that + // if the integer passed to the conversion is 0, a 0 will be printed. + // Example: ("%3d", 0) -> " 0" + zeroes = 1; + spaces = to_conv.min_width - zeroes - sign_char_len; + } else { + // If there are enough digits to pass over the precision, just write the + // number, padded by spaces. + zeroes = 0; + spaces = to_conv.min_width - digits_written - sign_char_len; + } + } else { + // If precision was specified, possibly write zeroes, and possibly write + // spaces. Example: ("%5.4d", 10000) -> "10000" + // If the check for if zeroes is negative was not there, spaces would be + // incorrectly evaluated as 1. + zeroes = to_conv.precision - digits_written; // a negative value means 0 + if (zeroes < 0) + zeroes = 0; + spaces = to_conv.min_width - zeroes - digits_written - sign_char_len; + } + if (spaces < 0) + spaces = 0; + + if ((flags & FormatFlags::LEFT_JUSTIFIED) == FormatFlags::LEFT_JUSTIFIED) { + // If left justified it goes sign zeroes digits spaces + if (sign_char != 0) + writer->write(&sign_char, 1); + if (zeroes > 0) + writer->write_chars('0', zeroes); + if (digits_written > 0) + writer->write(buffer + buff_cur, digits_written); + if (spaces > 0) + writer->write_chars(' ', spaces); + } else { + // Else it goes spaces sign zeroes digits + if (spaces > 0) + writer->write_chars(' ', spaces); + if (sign_char != 0) + writer->write(&sign_char, 1); + if (zeroes > 0) + writer->write_chars('0', zeroes); + if (digits_written > 0) + writer->write(buffer + buff_cur, digits_written); + } +} + +} // namespace printf_core +} // namespace __llvm_libc diff --git a/libc/src/stdio/printf_core/string_converter.h b/libc/src/stdio/printf_core/string_converter.h --- a/libc/src/stdio/printf_core/string_converter.h +++ b/libc/src/stdio/printf_core/string_converter.h @@ -14,7 +14,7 @@ namespace __llvm_libc { namespace printf_core { -void convert_string(Writer *writer, const FormatSection &to_conv) { +void inline convert_string(Writer *writer, const FormatSection &to_conv) { int string_len = 0; for (char *cur_str = reinterpret_cast(to_conv.conv_val_ptr); diff --git a/libc/test/src/stdio/sprintf_test.cpp b/libc/test/src/stdio/sprintf_test.cpp --- a/libc/test/src/stdio/sprintf_test.cpp +++ b/libc/test/src/stdio/sprintf_test.cpp @@ -76,6 +76,151 @@ ASSERT_STREQ(buff, " beginning is important."); } +TEST(LlvmLibcSPrintfTest, IntConv) { + char buff[64]; + int written; + + // Basic Tests. + + written = __llvm_libc::sprintf(buff, "%d", 123); + EXPECT_EQ(written, 3); + ASSERT_STREQ(buff, "123"); + + written = __llvm_libc::sprintf(buff, "%i", -456); + EXPECT_EQ(written, 4); + ASSERT_STREQ(buff, "-456"); + + // Length Modifier Tests. + + written = __llvm_libc::sprintf(buff, "%hhu", 257); // 0x10001 + EXPECT_EQ(written, 1); + ASSERT_STREQ(buff, "1"); + + written = __llvm_libc::sprintf(buff, "%llu", 18446744073709551615ull); + EXPECT_EQ(written, 20); + ASSERT_STREQ(buff, "18446744073709551615"); // ull max + + written = __llvm_libc::sprintf(buff, "%tu", ~ptrdiff_t(0)); + if (sizeof(ptrdiff_t) == 8) { + EXPECT_EQ(written, 20); + ASSERT_STREQ(buff, "18446744073709551615"); + } else if (sizeof(ptrdiff_t) == 4) { + EXPECT_EQ(written, 10); + ASSERT_STREQ(buff, "4294967296"); + } + + written = __llvm_libc::sprintf(buff, "%lld", -9223372036854775807ll - 1ll); + EXPECT_EQ(written, 20); + ASSERT_STREQ(buff, "-9223372036854775808"); // ll min + + // Min Width Tests. + + written = __llvm_libc::sprintf(buff, "%4d", 789); + EXPECT_EQ(written, 4); + ASSERT_STREQ(buff, " 789"); + + written = __llvm_libc::sprintf(buff, "%2d", 987); + EXPECT_EQ(written, 3); + ASSERT_STREQ(buff, "987"); + + // Precision Tests. + + written = __llvm_libc::sprintf(buff, "%d", 0); + EXPECT_EQ(written, 1); + ASSERT_STREQ(buff, "0"); + + written = __llvm_libc::sprintf(buff, "%.0d", 0); + EXPECT_EQ(written, 0); + ASSERT_STREQ(buff, ""); + + written = __llvm_libc::sprintf(buff, "%.5d", 654); + EXPECT_EQ(written, 5); + ASSERT_STREQ(buff, "00654"); + + written = __llvm_libc::sprintf(buff, "%.5d", -321); + EXPECT_EQ(written, 6); + ASSERT_STREQ(buff, "-00321"); + + written = __llvm_libc::sprintf(buff, "%.2d", 135); + EXPECT_EQ(written, 3); + ASSERT_STREQ(buff, "135"); + + // Flag Tests. + + written = __llvm_libc::sprintf(buff, "%.5d", -321); + EXPECT_EQ(written, 6); + ASSERT_STREQ(buff, "-00321"); + + written = __llvm_libc::sprintf(buff, "%-5d", 246); + EXPECT_EQ(written, 5); + ASSERT_STREQ(buff, "246 "); + + written = __llvm_libc::sprintf(buff, "%-5d", -147); + EXPECT_EQ(written, 5); + ASSERT_STREQ(buff, "-147 "); + + written = __llvm_libc::sprintf(buff, "%+d", 258); + EXPECT_EQ(written, 4); + ASSERT_STREQ(buff, "+258"); + + written = __llvm_libc::sprintf(buff, "% d", 369); + EXPECT_EQ(written, 4); + ASSERT_STREQ(buff, " 369"); + + written = __llvm_libc::sprintf(buff, "%05d", 470); + EXPECT_EQ(written, 5); + ASSERT_STREQ(buff, "00470"); + + written = __llvm_libc::sprintf(buff, "%05d", -581); + EXPECT_EQ(written, 5); + ASSERT_STREQ(buff, "-0581"); + + // Combined Tests. + + written = __llvm_libc::sprintf(buff, "%+ u", 692); + EXPECT_EQ(written, 3); + ASSERT_STREQ(buff, "692"); + + written = __llvm_libc::sprintf(buff, "%+ -05d", 703); + EXPECT_EQ(written, 5); + ASSERT_STREQ(buff, "+703 "); + + written = __llvm_libc::sprintf(buff, "%7.5d", 814); + EXPECT_EQ(written, 7); + ASSERT_STREQ(buff, " 00814"); + + written = __llvm_libc::sprintf(buff, "%7.5d", -925); + EXPECT_EQ(written, 7); + ASSERT_STREQ(buff, " -00925"); + + written = __llvm_libc::sprintf(buff, "%7.5d", 159); + EXPECT_EQ(written, 7); + ASSERT_STREQ(buff, " 00159"); + + written = __llvm_libc::sprintf(buff, "% -7.5d", 260); + EXPECT_EQ(written, 7); + ASSERT_STREQ(buff, " 00260 "); + + written = __llvm_libc::sprintf(buff, "%5.4d", 10000); + EXPECT_EQ(written, 5); + ASSERT_STREQ(buff, "10000"); + + // Multiple Conversion Tests. + + written = __llvm_libc::sprintf(buff, "%10d %-10d", 456, -789); + EXPECT_EQ(written, 21); + ASSERT_STREQ(buff, " 456 -789 "); + + written = __llvm_libc::sprintf(buff, "%-5.4d%+.4u", 75, 25); + EXPECT_EQ(written, 9); + ASSERT_STREQ(buff, "0075 0025"); + + written = __llvm_libc::sprintf(buff, "% 05hhi %+-0.5llu %-+ 06.3zd", + 256 + 127, 68719476736ll, size_t(2)); + EXPECT_EQ(written, 24); + ASSERT_STREQ(buff, " 0127 68719476736 +002 "); +} + #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE TEST(LlvmLibcSPrintfTest, IndexModeParsing) { char buff[64];