Changeset View
Standalone View
llvm/lib/Support/Format.cpp
- This file was added.
//===-- Support/FoldingSet.cpp - Uniquing Hash Set --------------*- C++ -*-===// | |||||
// | |||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||||
// See https://llvm.org/LICENSE.txt for license information. | |||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||||
// | |||||
//===----------------------------------------------------------------------===// | |||||
// | |||||
// This file implements the non-template part of Format.h, which is used to | |||||
// provide a type-safe-ish interface to printf-style formatting. | |||||
// | |||||
//===----------------------------------------------------------------------===// | |||||
#include "llvm/Support/Format.h" | |||||
namespace { | |||||
ahatanak: Can you leave comments that explain what these enums mean? | |||||
enum ArgLength { | |||||
AL_ShortShort, | |||||
AL_Short, | |||||
AL_Default, | |||||
AL_Long, | |||||
AL_LongLong, | |||||
AL_IntMax, | |||||
AL_Size, | |||||
AL_Ptrdiff, | |||||
AL_LongDouble, | |||||
AL_End, | |||||
}; | |||||
enum SpecifierChar { | |||||
SC_Int, | |||||
SC_UInt, | |||||
SC_Float, | |||||
SC_Char, | |||||
SC_String, | |||||
SC_VoidPointer, | |||||
SC_Count, | |||||
SC_End, | |||||
}; | |||||
constexpr auto ST_Unknown = llvm::PrintfStyleFormatReader::ST_Unknown; | |||||
constexpr auto ST_WideChar = llvm::PrintfStyleFormatReader::ST_WideChar; | |||||
constexpr auto ST_Int = llvm::PrintfStyleFormatReader::ST_Int; | |||||
constexpr auto ST_UInt = llvm::PrintfStyleFormatReader::ST_UInt; | |||||
constexpr auto ST_Long = llvm::PrintfStyleFormatReader::ST_Long; | |||||
constexpr auto ST_ULong = llvm::PrintfStyleFormatReader::ST_ULong; | |||||
constexpr auto ST_LongLong = llvm::PrintfStyleFormatReader::ST_LongLong; | |||||
constexpr auto ST_ULongLong = llvm::PrintfStyleFormatReader::ST_ULongLong; | |||||
constexpr auto ST_IntMax = llvm::PrintfStyleFormatReader::ST_IntMax; | |||||
constexpr auto ST_UIntMax = llvm::PrintfStyleFormatReader::ST_UIntMax; | |||||
constexpr auto ST_Size = llvm::PrintfStyleFormatReader::ST_Size; | |||||
constexpr auto ST_Ptrdiff = llvm::PrintfStyleFormatReader::ST_Ptrdiff; | |||||
constexpr auto ST_Double = llvm::PrintfStyleFormatReader::ST_Double; | |||||
constexpr auto ST_LongDouble = llvm::PrintfStyleFormatReader::ST_LongDouble; | |||||
constexpr auto ST_CString = llvm::PrintfStyleFormatReader::ST_CString; | |||||
constexpr auto ST_WideCString = llvm::PrintfStyleFormatReader::ST_WideCString; | |||||
constexpr auto ST_VoidPointer = llvm::PrintfStyleFormatReader::ST_VoidPointer; | |||||
constexpr auto ST_Count_Char = llvm::PrintfStyleFormatReader::ST_Count_Char; | |||||
constexpr auto ST_Count_Short = llvm::PrintfStyleFormatReader::ST_Count_Short; | |||||
constexpr auto ST_Count_Int = llvm::PrintfStyleFormatReader::ST_Count_Int; | |||||
Function names should start with a lower case letter. https://llvm.org/docs/CodingStandards.html#name-types-functions-variables-and-enumerators-properly Please fix this and other functions in this patch that start with an upper case letter. ahatanak: Function names should start with a lower case letter.
https://llvm.org/docs/CodingStandards. | |||||
constexpr auto ST_Count_Long = llvm::PrintfStyleFormatReader::ST_Count_Long; | |||||
constexpr auto ST_Count_LongLong = | |||||
llvm::PrintfStyleFormatReader::ST_Count_LongLong; | |||||
constexpr auto ST_Count_IntMax = llvm::PrintfStyleFormatReader::ST_Count_IntMax; | |||||
constexpr auto ST_Count_Size = llvm::PrintfStyleFormatReader::ST_Count_Size; | |||||
constexpr auto ST_Count_Ptrdiff = | |||||
llvm::PrintfStyleFormatReader::ST_Count_Ptrdiff; | |||||
llvm::PrintfStyleFormatReader::SpecifierType SpecifierTable[SC_End][AL_End] = { | |||||
{ | |||||
// SC_Int | |||||
ST_Int, | |||||
ST_Int, | |||||
ST_Int, | |||||
ST_Long, | |||||
ST_LongLong, | |||||
ST_IntMax, | |||||
ST_Size, | |||||
ST_Ptrdiff, | |||||
ST_Unknown, | |||||
}, | |||||
{ | |||||
// SC_UInt | |||||
ST_UInt, | |||||
ST_UInt, | |||||
ST_UInt, | |||||
ST_ULong, | |||||
ST_ULongLong, | |||||
ST_UIntMax, | |||||
ST_Size, | |||||
ST_Ptrdiff, | |||||
ST_Unknown, | |||||
}, | |||||
{ | |||||
// SC_Float | |||||
ST_Unknown, | |||||
ST_Unknown, | |||||
ST_Double, | |||||
ST_Unknown, | |||||
ahatanakUnsubmitted I think this rejects the combination of length modifier l and floating point conversions (e.g., %lf), but the standard doesn't say that's undefined behavior. It just says it has no effect. ahatanak: I think this rejects the combination of length modifier `l` and floating point conversions (e.g. | |||||
ST_Unknown, | |||||
ST_Unknown, | |||||
ST_Unknown, | |||||
ST_Unknown, | |||||
ST_LongDouble, | |||||
}, | |||||
{ | |||||
// SC_Char | |||||
ST_Unknown, | |||||
ST_Unknown, | |||||
ST_Int, | |||||
ST_WideChar, | |||||
ST_Unknown, | |||||
ST_Unknown, | |||||
ST_Unknown, | |||||
ST_Unknown, | |||||
ST_Unknown, | |||||
}, | |||||
{ | |||||
// SC_String | |||||
ST_Unknown, | |||||
ST_Unknown, | |||||
ST_CString, | |||||
ST_WideCString, | |||||
ST_Unknown, | |||||
ST_Unknown, | |||||
ST_Unknown, | |||||
ST_Unknown, | |||||
ST_Unknown, | |||||
}, | |||||
{ | |||||
// SC_VoidPointer | |||||
ST_Unknown, | |||||
ST_Unknown, | |||||
ST_VoidPointer, | |||||
ST_Unknown, | |||||
ST_Unknown, | |||||
ST_Unknown, | |||||
ST_Unknown, | |||||
ST_Unknown, | |||||
ST_Unknown, | |||||
}, | |||||
{ | |||||
// SC_Count | |||||
ST_Count_Char, | |||||
ST_Count_Short, | |||||
ST_Count_Int, | |||||
ST_Count_Long, | |||||
ST_Count_LongLong, | |||||
ST_Count_IntMax, | |||||
ST_Count_Size, | |||||
ST_Count_Ptrdiff, | |||||
ST_Unknown, | |||||
}, | |||||
}; | |||||
const char *next(const char *str, char c) { | |||||
while (*str) { | |||||
ahatanakUnsubmitted Can you use StringRef::find or some other functions that is available instead of writing a loop here? ahatanak: Can you use `StringRef::find` or some other functions that is available instead of writing a… | |||||
fcloutierAuthorUnsubmitted I started off using StringRef and found it really cumbersome compared to using a raw const char *. In retrospective, it's not surprising given that printf was meant for C first. I removed next and replaced its only use with strchr to avoid the loop. fcloutier: I started off using `StringRef` and found it really cumbersome compared to using a raw `const… | |||||
if (*str == c) | |||||
break; | |||||
str++; | |||||
} | |||||
return str; | |||||
} | |||||
} // namespace | |||||
namespace llvm { | |||||
void PrintfStyleFormatReader::RefillSpecifierQueue() { | |||||
Fmt = next(Fmt, '%'); | |||||
if (*Fmt == '\0') { | |||||
SpecifierQueue.push_back(ST_EndOfFormatString); | |||||
return; | |||||
} | |||||
if (*++Fmt == '%') { | |||||
// %% case: skip and try again | |||||
Fmt++; | |||||
RefillSpecifierQueue(); | |||||
return; | |||||
} | |||||
// Push ST_Unknown to SpecifierQueue. If we bail out early, this is what | |||||
// the caller gets. Fill in real specifiers to Specifiers: if we | |||||
// successfully get to the end, then swap Specifiers with SpecifierQueue. | |||||
SpecifierQueue.push_back(ST_Unknown); | |||||
Not Done ReplyInline ActionsThe coding standard recommends using preincrement. A few other places are using postincrement too. https://llvm.org/docs/CodingStandards.html#prefer-preincrement ahatanak: The coding standard recommends using preincrement. A few other places are using postincrement… | |||||
llvm::SmallVector<SpecifierType, 3> Specifiers; | |||||
// skip flags | |||||
ahatanakUnsubmitted Flags are completely ignored here, but isn't the behavior undefined for some combinations of flags and conversions according to the standard (e.g., "%0p")? If it is, can you leave a comment that notes that this code ignores undefined behavior? Or can you leave a FIXME note if you plan to fix this in the future? ahatanak: Flags are completely ignored here, but isn't the behavior undefined for some combinations of… | |||||
fcloutierAuthorUnsubmitted I implemented the missing checks. fcloutier: I implemented the missing checks. | |||||
StringRef FlagChars = "+- #0"; | |||||
while (FlagChars.find(*Fmt) != llvm::StringRef::npos) | |||||
Fmt++; | |||||
// skip width | |||||
if (*Fmt == '*') { | |||||
Specifiers.push_back(ST_Int); | |||||
Fmt++; | |||||
} else | |||||
while (*Fmt >= '0' && *Fmt <= '9') | |||||
Fmt++; | |||||
// skip precision | |||||
if (*Fmt == '.') { | |||||
Fmt++; | |||||
if (*Fmt == '*') { | |||||
Specifiers.push_back(ST_Int); | |||||
Fmt++; | |||||
} else | |||||
while (*Fmt >= '0' && *Fmt <= '9') | |||||
Fmt++; | |||||
} | |||||
// parse length | |||||
bool FoundLength = false; | |||||
ArgLength AL = AL_Default; | |||||
while (!FoundLength) { | |||||
ArgLength NewAL; | |||||
switch (*Fmt) { | |||||
case 'h': | |||||
NewAL = AL_Short; | |||||
break; | |||||
case 'l': | |||||
NewAL = AL_Long; | |||||
break; | |||||
case 'j': | |||||
NewAL = AL_IntMax; | |||||
break; | |||||
case 'z': | |||||
NewAL = AL_Size; | |||||
break; | |||||
case 't': | |||||
NewAL = AL_Ptrdiff; | |||||
break; | |||||
case 'L': | |||||
NewAL = AL_LongDouble; | |||||
break; | |||||
default: | |||||
FoundLength = true; | |||||
continue; | |||||
} | |||||
if (NewAL == AL_Long && AL == AL_Long) | |||||
AL = AL_LongLong; | |||||
else if (NewAL == AL_Short && AL == AL_Short) | |||||
AL = AL_ShortShort; | |||||
else if (AL == AL_Default) | |||||
AL = NewAL; | |||||
else | |||||
return; | |||||
Fmt++; | |||||
} | |||||
// parse specifier | |||||
SpecifierChar SC; | |||||
switch (*Fmt++) { | |||||
case 'd': | |||||
case 'i': | |||||
SC = SC_Int; | |||||
break; | |||||
case 'u': | |||||
case 'o': | |||||
case 'x': | |||||
case 'X': | |||||
SC = SC_UInt; | |||||
break; | |||||
case 'a': | |||||
case 'A': | |||||
case 'e': | |||||
case 'E': | |||||
case 'f': | |||||
case 'F': | |||||
case 'g': | |||||
case 'G': | |||||
SC = SC_Float; | |||||
break; | |||||
case 'c': | |||||
SC = SC_Char; | |||||
break; | |||||
case 's': | |||||
SC = SC_String; | |||||
break; | |||||
case 'p': | |||||
SC = SC_VoidPointer; | |||||
break; | |||||
case 'n': | |||||
SC = SC_Count; | |||||
break; | |||||
default: | |||||
return; | |||||
} | |||||
auto Spec = SpecifierTable[SC][AL]; | |||||
if (Spec == ST_Unknown) | |||||
return; | |||||
if (!SignSensitive) { | |||||
switch (Spec) { | |||||
case ST_UInt: | |||||
Spec = ST_Int; | |||||
break; | |||||
case ST_ULong: | |||||
Spec = ST_Long; | |||||
break; | |||||
case ST_ULongLong: | |||||
Spec = ST_LongLong; | |||||
break; | |||||
case ST_UIntMax: | |||||
Spec = ST_IntMax; | |||||
break; | |||||
default: | |||||
break; | |||||
} | |||||
} | |||||
Specifiers.push_back(Spec); | |||||
std::reverse(Specifiers.begin(), Specifiers.end()); | |||||
std::swap(Specifiers, SpecifierQueue); | |||||
} | |||||
const char *PrintfStyleFormatReader::EnsureCompatible(const char *Expected, | |||||
const char *Fmt) { | |||||
PrintfStyleFormatReader EFR(Expected); | |||||
PrintfStyleFormatReader FFR(Fmt); | |||||
ahatanakUnsubmitted Don't you have to check that the lengths of both lists are equal? What happens if Expected is %d%d and Fmt is %d? ahatanak: Don't you have to check that the lengths of both lists are equal? What happens if `Expected` is… | |||||
fcloutierAuthorUnsubmitted It's not UB to call va_end before you've read each argument of a va_list, so the one thing that matters is that Fmt doesn't have more specifiers than Expected. This is ensured by breaking out of the loop when Fmt runs out of specifiers, regardless of whether Expected still has more. If we run out of Expected specifiers while Fmt still has specifiers, we error out. fcloutier: It's not UB to call `va_end` before you've read each argument of a `va_list`, so the one thing… | |||||
ahatanakUnsubmitted Not Done ReplyInline ActionsIt's not UB, but why would a user want to pass a different number of specifiers? Isn't that an indication of some kind of error? ahatanak: It's not UB, but why would a user want to pass a different number of specifiers? Isn't that an… | |||||
fcloutierAuthorUnsubmitted There's just one use of it, so I'll change it to require the two sequences to have the same length. fcloutier: There's just one use of it, so I'll change it to require the two sequences to have the same… | |||||
while (SpecifierType ST = FFR.NextSpecifier()) | |||||
if (ST == ST_Unknown || ST != EFR.NextSpecifier()) | |||||
return Expected; | |||||
return Fmt; | |||||
} | |||||
} // namespace llvm |
Can you leave comments that explain what these enums mean?