diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt --- a/libc/src/stdio/printf_core/CMakeLists.txt +++ b/libc/src/stdio/printf_core/CMakeLists.txt @@ -57,9 +57,11 @@ HDRS converter.h converter_atlas.h + converter_utils.h string_converter.h char_converter.h int_converter.h + hex_converter.h DEPENDS .writer .core_structs diff --git a/libc/src/stdio/printf_core/char_converter.h b/libc/src/stdio/printf_core/char_converter.h --- a/libc/src/stdio/printf_core/char_converter.h +++ b/libc/src/stdio/printf_core/char_converter.h @@ -9,6 +9,7 @@ #ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CHAR_CONVERTER_H #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CHAR_CONVERTER_H +#include "src/stdio/printf_core/converter_utils.h" #include "src/stdio/printf_core/core_structs.h" #include "src/stdio/printf_core/writer.h" diff --git a/libc/src/stdio/printf_core/converter.cpp b/libc/src/stdio/printf_core/converter.cpp --- a/libc/src/stdio/printf_core/converter.cpp +++ b/libc/src/stdio/printf_core/converter.cpp @@ -43,7 +43,7 @@ // return convert_oct(writer, to_conv); case 'x': case 'X': - // return convert_hex(writer, to_conv); + return convert_hex(writer, to_conv); // TODO(michaelrj): add a flag to disable float point values here case 'f': case 'F': diff --git a/libc/src/stdio/printf_core/converter_atlas.h b/libc/src/stdio/printf_core/converter_atlas.h --- a/libc/src/stdio/printf_core/converter_atlas.h +++ b/libc/src/stdio/printf_core/converter_atlas.h @@ -24,6 +24,7 @@ // defines convert_oct // defines convert_hex +#include "src/stdio/printf_core/hex_converter.h" // TODO(michaelrj): add a flag to disable float point values here // defines convert_float_decimal diff --git a/libc/src/stdio/printf_core/converter_utils.h b/libc/src/stdio/printf_core/converter_utils.h new file mode 100644 --- /dev/null +++ b/libc/src/stdio/printf_core/converter_utils.h @@ -0,0 +1,56 @@ +//===-- Shared Converter Utilities for printf -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CONVERTER_UTILS_H +#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CONVERTER_UTILS_H + +#include "src/__support/CPP/Limits.h" +#include "src/stdio/printf_core/core_structs.h" + +#include +#include + +namespace __llvm_libc { +namespace printf_core { + +inline uintmax_t apply_length_modifier(uintmax_t num, LengthModifier lm) { + switch (lm) { + case LengthModifier::none: + return num & cpp::NumericLimits::max(); + case LengthModifier::l: + return num & cpp::NumericLimits::max(); + case LengthModifier::ll: + case LengthModifier::L: + return num & cpp::NumericLimits::max(); + case LengthModifier::h: + return num & cpp::NumericLimits::max(); + case LengthModifier::hh: + return num & cpp::NumericLimits::max(); + case LengthModifier::z: + return num & cpp::NumericLimits::max(); + case LengthModifier::t: + // We don't have unsigned ptrdiff so uintptr_t is used, since we need an + // unsigned type and ptrdiff is usually the same size as a pointer. + static_assert(sizeof(ptrdiff_t) == sizeof(uintptr_t)); + return num & cpp::NumericLimits::max(); + case LengthModifier::j: + return num; // j is intmax, so no mask is necessary. + } +} + +#define RET_IF_RESULT_NEGATIVE(func) \ + { \ + int result = (func); \ + if (result < 0) \ + return result; \ + } + +} // namespace printf_core +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CONVERTER_UTILS_H diff --git a/libc/src/stdio/printf_core/core_structs.h b/libc/src/stdio/printf_core/core_structs.h --- a/libc/src/stdio/printf_core/core_structs.h +++ b/libc/src/stdio/printf_core/core_structs.h @@ -78,14 +78,6 @@ return true; } }; - -#define RET_IF_RESULT_NEGATIVE(func) \ - { \ - int result = (func); \ - if (result < 0) \ - return result; \ - } - } // namespace printf_core } // namespace __llvm_libc diff --git a/libc/src/stdio/printf_core/hex_converter.h b/libc/src/stdio/printf_core/hex_converter.h new file mode 100644 --- /dev/null +++ b/libc/src/stdio/printf_core/hex_converter.h @@ -0,0 +1,133 @@ +//===-- Hexadecimal Converter for printf ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_HEX_CONVERTER_H +#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_HEX_CONVERTER_H + +#include "src/stdio/printf_core/converter_utils.h" +#include "src/stdio/printf_core/core_structs.h" +#include "src/stdio/printf_core/writer.h" + +#include +#include + +namespace __llvm_libc { +namespace printf_core { + +int convert_hex(Writer *writer, const FormatSection &to_conv) { + // This approximates the number of digits it takes to represent a hexadecimal + // value of a certain number of bits. Each hex digit represents 4 bits, so the + // exact value is the number of bytes multiplied by 2. + static constexpr size_t BUFF_LEN = sizeof(uintmax_t) * 2; + uintmax_t num = to_conv.conv_val_raw; + char buffer[BUFF_LEN]; + + // All of the characters will be defined relative to variable a, which will be + // the appropriate case based on the name of the conversion. + char a; + if (to_conv.conv_name == 'x') + a = 'a'; + else + a = 'A'; + + num = apply_length_modifier(num, to_conv.length_modifier); + + // buff_cur can never reach 0, since the buffer is sized to always be able to + // contain the whole integer. This means that bounds checking it should be + // unnecessary. + size_t buff_cur = BUFF_LEN; + for (; num > 0 /* && buff_cur > 0 */; --buff_cur, num /= 16) + buffer[buff_cur - 1] = + ((num % 16) > 9) ? ((num % 16) - 10 + a) : ((num % 16) + '0'); + + size_t digits_written = BUFF_LEN - buff_cur; + + // these are signed to prevent underflow due to negative values. The eventual + // values will always be non-negative. + int zeroes; + int spaces; + + // prefix is "0x" + int prefix_len; + char prefix[2]; + if ((to_conv.flags & FormatFlags::ALTERNATE_FORM) == + FormatFlags::ALTERNATE_FORM) { + prefix_len = 2; + prefix[0] = '0'; + prefix[1] = a + ('x' - 'a'); + } else { + prefix_len = 0; + prefix[0] = 0; + } + + // negative precision indicates that it was not specified. + if (to_conv.precision < 0) { + if ((to_conv.flags & + (FormatFlags::LEADING_ZEROES | FormatFlags::LEFT_JUSTIFIED)) == + FormatFlags::LEADING_ZEROES) { + // if this conv has flag 0 but not - and no specified precision, it's + // padded with 0's instead of spaces identically to if precision = + // min_width - (2 if prefix). For example: ("%#04x", 15) -> "0x0f" + zeroes = to_conv.min_width - digits_written - prefix_len; + if (zeroes < 0) + zeroes = 0; + spaces = 0; + } else if (digits_written < 1) { + // if no precision is specified, precision defaults to 1. This means that + // if the integer passed to the conversion is 0, a 0 will be printed. + // Example: ("%3x", 0) -> " 0" + zeroes = 1; + spaces = to_conv.min_width - zeroes - prefix_len; + } else { + // If there are enough digits to pass over the precision, just write the + // number, padded by spaces. + zeroes = 0; + spaces = to_conv.min_width - digits_written - prefix_len; + } + } else { + // if precision was specified, possibly write zeroes, and possibly write + // spaces. Example: ("%5.4x", 0x10000) -> "10000" + // If the check for if zeroes is negative was not there, spaces would be + // incorrectly evaluated as 1. + zeroes = to_conv.precision - digits_written; // a negative value means 0 + if (zeroes < 0) + zeroes = 0; + spaces = to_conv.min_width - zeroes - digits_written - prefix_len; + } + if (spaces < 0) + spaces = 0; + + if ((to_conv.flags & FormatFlags::LEFT_JUSTIFIED) == + FormatFlags::LEFT_JUSTIFIED) { + // if left justified it goes prefix zeroes digits spaces + if (prefix[0] != 0) + RET_IF_RESULT_NEGATIVE(writer->write(prefix, 2)); + if (zeroes > 0) + RET_IF_RESULT_NEGATIVE(writer->write_chars('0', zeroes)); + if (digits_written > 0) + RET_IF_RESULT_NEGATIVE(writer->write(buffer + buff_cur, digits_written)); + if (spaces > 0) + RET_IF_RESULT_NEGATIVE(writer->write_chars(' ', spaces)); + } else { + // else it goes spaces prefix zeroes digits + if (spaces > 0) + RET_IF_RESULT_NEGATIVE(writer->write_chars(' ', spaces)); + if (prefix[0] != 0) + RET_IF_RESULT_NEGATIVE(writer->write(prefix, 2)); + if (zeroes > 0) + RET_IF_RESULT_NEGATIVE(writer->write_chars('0', zeroes)); + if (digits_written > 0) + RET_IF_RESULT_NEGATIVE(writer->write(buffer + buff_cur, digits_written)); + } + return 0; +} + +} // namespace printf_core +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_HEX_CONVERTER_H diff --git a/libc/src/stdio/printf_core/int_converter.h b/libc/src/stdio/printf_core/int_converter.h --- a/libc/src/stdio/printf_core/int_converter.h +++ b/libc/src/stdio/printf_core/int_converter.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_INT_CONVERTER_H #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_INT_CONVERTER_H -#include "src/__support/CPP/Limits.h" +#include "src/stdio/printf_core/converter_utils.h" #include "src/stdio/printf_core/core_structs.h" #include "src/stdio/printf_core/writer.h" @@ -52,37 +52,7 @@ } } - switch (to_conv.length_modifier) { - case LengthModifier::none: - num = num & cpp::NumericLimits::max(); - break; - - case LengthModifier::l: - num = num & cpp::NumericLimits::max(); - break; - case LengthModifier::ll: - case LengthModifier::L: - num = num & cpp::NumericLimits::max(); - break; - case LengthModifier::h: - num = num & cpp::NumericLimits::max(); - break; - case LengthModifier::hh: - num = num & cpp::NumericLimits::max(); - break; - case LengthModifier::z: - num = num & cpp::NumericLimits::max(); - break; - case LengthModifier::t: - // We don't have unsigned ptrdiff so uintptr_t is used, since we need an - // unsigned type and ptrdiff is usually the same size as a pointer. - static_assert(sizeof(ptrdiff_t) == sizeof(uintptr_t)); - num = num & cpp::NumericLimits::max(); - break; - case LengthModifier::j: - // j is intmax, so no mask is necessary. - break; - } + num = apply_length_modifier(num, to_conv.length_modifier); // buff_cur can never reach 0, since the buffer is sized to always be able to // contain the whole integer. This means that bounds checking it should be diff --git a/libc/src/stdio/printf_core/string_converter.h b/libc/src/stdio/printf_core/string_converter.h --- a/libc/src/stdio/printf_core/string_converter.h +++ b/libc/src/stdio/printf_core/string_converter.h @@ -9,6 +9,7 @@ #ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_STRING_CONVERTER_H #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_STRING_CONVERTER_H +#include "src/stdio/printf_core/converter_utils.h" #include "src/stdio/printf_core/core_structs.h" #include "src/stdio/printf_core/writer.h" diff --git a/libc/test/src/stdio/printf_core/converter_test.cpp b/libc/test/src/stdio/printf_core/converter_test.cpp --- a/libc/test/src/stdio/printf_core/converter_test.cpp +++ b/libc/test/src/stdio/printf_core/converter_test.cpp @@ -195,3 +195,31 @@ ASSERT_STREQ(str, "12345"); ASSERT_EQ(writer.get_chars_written(), 5); } + +// This needs to be switched to the new testing layout, but that's still in the +// int patch so I need to land that first. This is what I get for not keeping my +// patches small and focused. +TEST(LlvmLibcPrintfConverterTest, HexConversion) { + char str[20]; + __llvm_libc::printf_core::StringWriter str_writer(str); + __llvm_libc::printf_core::Writer writer( + reinterpret_cast(&str_writer), + __llvm_libc::printf_core::write_to_string); + + __llvm_libc::printf_core::FormatSection left_justified_conv; + left_justified_conv.has_conv = true; + left_justified_conv.raw_string = "%#018x"; + left_justified_conv.raw_len = 6; + left_justified_conv.conv_name = 'x'; + left_justified_conv.flags = + static_cast<__llvm_libc::printf_core::FormatFlags>( + __llvm_libc::printf_core::FormatFlags::ALTERNATE_FORM | + __llvm_libc::printf_core::FormatFlags::LEADING_ZEROES); + left_justified_conv.min_width = 18; + left_justified_conv.conv_val_raw = 0x123456ab; + __llvm_libc::printf_core::convert(&writer, left_justified_conv); + + str_writer.terminate(); + ASSERT_STREQ(str, "0x00000000123456ab"); + ASSERT_EQ(writer.get_chars_written(), 18); +} diff --git a/libc/test/src/stdio/sprintf_test.cpp b/libc/test/src/stdio/sprintf_test.cpp --- a/libc/test/src/stdio/sprintf_test.cpp +++ b/libc/test/src/stdio/sprintf_test.cpp @@ -221,6 +221,131 @@ ASSERT_STREQ(buff, " 0127 68719476736 +002 "); } +TEST(LlvmLibcSPrintfTest, HexConv) { + char buff[64]; + int written; + + // Basic Tests. + + written = __llvm_libc::sprintf(buff, "%x", 0x123a); + EXPECT_EQ(written, 4); + ASSERT_STREQ(buff, "123a"); + + written = __llvm_libc::sprintf(buff, "%X", 0x456b); + EXPECT_EQ(written, 4); + ASSERT_STREQ(buff, "456B"); + + // Length Modifier Tests. + + written = __llvm_libc::sprintf(buff, "%hhx", 0x10001); + EXPECT_EQ(written, 1); + ASSERT_STREQ(buff, "1"); + + written = __llvm_libc::sprintf(buff, "%llx", 0xffffffffffffffffull); + EXPECT_EQ(written, 16); + ASSERT_STREQ(buff, "ffffffffffffffff"); // ull max + + written = __llvm_libc::sprintf(buff, "%tX", ~ptrdiff_t(0)); + if (sizeof(ptrdiff_t) == 8) { + EXPECT_EQ(written, 16); + ASSERT_STREQ(buff, "FFFFFFFFFFFFFFFF"); + } else if (sizeof(ptrdiff_t) == 4) { + EXPECT_EQ(written, 8); + ASSERT_STREQ(buff, "FFFFFFFF"); + } + + // Min Width Tests. + + written = __llvm_libc::sprintf(buff, "%4x", 0x789); + EXPECT_EQ(written, 4); + ASSERT_STREQ(buff, " 789"); + + written = __llvm_libc::sprintf(buff, "%2X", 0x987); + EXPECT_EQ(written, 3); + ASSERT_STREQ(buff, "987"); + + // Precision Tests. + + written = __llvm_libc::sprintf(buff, "%x", 0); + EXPECT_EQ(written, 1); + ASSERT_STREQ(buff, "0"); + + written = __llvm_libc::sprintf(buff, "%.0x", 0); + EXPECT_EQ(written, 0); + ASSERT_STREQ(buff, ""); + + written = __llvm_libc::sprintf(buff, "%.5x", 0x1F3); + EXPECT_EQ(written, 5); + ASSERT_STREQ(buff, "001f3"); + + written = __llvm_libc::sprintf(buff, "%.2x", 0x135); + EXPECT_EQ(written, 3); + ASSERT_STREQ(buff, "135"); + + // Flag Tests. + + written = __llvm_libc::sprintf(buff, "%-5x", 0x246); + EXPECT_EQ(written, 5); + ASSERT_STREQ(buff, "246 "); + + written = __llvm_libc::sprintf(buff, "%#x", 0xd3f); + EXPECT_EQ(written, 5); + ASSERT_STREQ(buff, "0xd3f"); + + written = __llvm_libc::sprintf(buff, "%#X", 0xE40); + EXPECT_EQ(written, 5); + ASSERT_STREQ(buff, "0XE40"); + + written = __llvm_libc::sprintf(buff, "%05x", 0x470); + EXPECT_EQ(written, 5); + ASSERT_STREQ(buff, "00470"); + + written = __llvm_libc::sprintf(buff, "%0#6x", 0x8c3); + EXPECT_EQ(written, 6); + ASSERT_STREQ(buff, "0x08c3"); + + written = __llvm_libc::sprintf(buff, "%-#6x", 0x5f0); + EXPECT_EQ(written, 6); + ASSERT_STREQ(buff, "0x5f0 "); + + // Combined Tests. + + written = __llvm_libc::sprintf(buff, "%#-07x", 0x703); + EXPECT_EQ(written, 7); + ASSERT_STREQ(buff, "0x703 "); + + written = __llvm_libc::sprintf(buff, "%7.5x", 0x814); + EXPECT_EQ(written, 7); + ASSERT_STREQ(buff, " 00814"); + + written = __llvm_libc::sprintf(buff, "%#9.5X", 0x9d4); + EXPECT_EQ(written, 9); + ASSERT_STREQ(buff, " 0X009D4"); + + written = __llvm_libc::sprintf(buff, "%-7.5x", 0x260); + EXPECT_EQ(written, 7); + ASSERT_STREQ(buff, "00260 "); + + written = __llvm_libc::sprintf(buff, "%5.4x", 0x10000); + EXPECT_EQ(written, 5); + ASSERT_STREQ(buff, "10000"); + + // Multiple Conversion Tests. + + written = __llvm_libc::sprintf(buff, "%10X %-#10x", 0x45b, 0x789); + EXPECT_EQ(written, 21); + ASSERT_STREQ(buff, " 45B 0x789 "); + + written = __llvm_libc::sprintf(buff, "%-5.4x%#.4x", 0x75, 0x25); + EXPECT_EQ(written, 11); + ASSERT_STREQ(buff, "0075 0x0025"); + + written = __llvm_libc::sprintf(buff, "%04hhX %#.5llx %-6.3zX", 256 + 0x7f, + 0x1000000000ll, size_t(2)); + EXPECT_EQ(written, 24); + ASSERT_STREQ(buff, "007F 0x1000000000 002 "); +} + #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE TEST(LlvmLibcSPrintfTest, IndexModeParsing) { char buff[64];