Changeset View
Standalone View
clang/lib/Format/FormatToken.h
Show First 20 Lines • Show All 134 Lines • ▼ Show 20 Lines | |||||
enum FormatDecision { FD_Unformatted, FD_Continue, FD_Break }; | enum FormatDecision { FD_Unformatted, FD_Continue, FD_Break }; | ||||
class TokenRole; | class TokenRole; | ||||
class AnnotatedLine; | class AnnotatedLine; | ||||
/// A wrapper around a \c Token storing information about the | /// A wrapper around a \c Token storing information about the | ||||
/// whitespace characters preceding it. | /// whitespace characters preceding it. | ||||
struct FormatToken { | struct FormatToken { | ||||
FormatToken() {} | FormatToken() | ||||
: HasUnescapedNewline(false), IsMultiline(false), IsFirst(false), | |||||
MustBreakBefore(false), MustBreakAlignBefore(false), | |||||
IsUnterminatedLiteral(false), CanBreakBefore(false), | |||||
ClosesTemplateDeclaration(false), StartsBinaryExpression(false), | |||||
EndsBinaryExpression(false), PartOfMultiVariableDeclStmt(false), | |||||
ContinuesLineCommentSection(false), Finalized(false), | |||||
BlockKind(BK_Unknown), Type(TT_Unknown), Decision(FD_Unformatted), | |||||
PackingKind(PPK_Inconclusive) {} | |||||
MyDeveloperDay: I much prefer putting the initialization here, I think it makes it MUCH clearer | |||||
It is necessary anyway since a bit-field cannot have a default member initializer pre-C++20. riccibruno: It is necessary anyway since a bit-field cannot have a default member initializer pre-C++20. | |||||
/// The \c Token. | /// The \c Token. | ||||
Token Tok; | Token Tok; | ||||
/// The number of newlines immediately before the \c Token. | /// The raw text of the token. | ||||
/// | /// | ||||
/// This can be used to determine what the user wrote in the original code | /// Contains the raw token text without leading whitespace and without leading | ||||
/// and thereby e.g. leave an empty line between two function definitions. | /// escaped newlines. | ||||
unsigned NewlinesBefore = 0; | StringRef TokenText; | ||||
/// Whether there is at least one unescaped newline before the \c | /// A token can have a special role that can carry extra information | ||||
/// Token. | /// about the token's formatting. | ||||
bool HasUnescapedNewline = false; | std::unique_ptr<TokenRole> Role; | ||||
/// The range of the whitespace immediately preceding the \c Token. | /// The range of the whitespace immediately preceding the \c Token. | ||||
SourceRange WhitespaceRange; | SourceRange WhitespaceRange; | ||||
/// The offset just past the last '\n' in this token's leading | /// Whether there is at least one unescaped newline before the \c | ||||
/// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'. | /// Token. | ||||
unsigned LastNewlineOffset = 0; | unsigned HasUnescapedNewline : 1; | ||||
/// The width of the non-whitespace parts of the token (or its first | |||||
/// line for multi-line tokens) in columns. | |||||
/// We need this to correctly measure number of columns a token spans. | |||||
unsigned ColumnWidth = 0; | |||||
/// Contains the width in columns of the last line of a multi-line | |||||
/// token. | |||||
unsigned LastLineColumnWidth = 0; | |||||
/// Whether the token text contains newlines (escaped or not). | /// Whether the token text contains newlines (escaped or not). | ||||
bool IsMultiline = false; | unsigned IsMultiline : 1; | ||||
/// Indicates that this is the first token of the file. | /// Indicates that this is the first token of the file. | ||||
bool IsFirst = false; | unsigned IsFirst : 1; | ||||
educate me, why unsigned IsFirst : 1; here and not bool IsFirst : 1; is that equivalent? (I'm literally not sure myself), I wrote a little test just to remind myself how this stuff works. #include <iostream> class Foo { public: Foo() : A(true) , B(false) , C(true) { } bool A : 1; bool B : 1; bool C : 1; }; class Bar { public: Bar() : A(true) , B(false) , C(true) { } unsigned A : 1; unsigned B : 1; unsigned C : 1; }; class Fuz { public: Fuz() : A(true) , B(false) , C(true) { } bool A; bool B; bool C; }; class Baz { public: Baz() : A(true) , B(false) , C(true) { } unsigned A; unsigned B; unsigned C; }; int main(int argc, char *argv[]) { std::cerr << "Foo " << sizeof(Foo) << "\n"; std::cerr << "Bar " <<sizeof(Bar) << "\n"; std::cerr << "Fuz " <<sizeof(Fuz) << "\n"; std::cerr << "Baz " <<sizeof(Baz) << "\n"; return 0; } When run gives the following: Foo 1 Bar 4 Fuz 3 Baz 12 So I guess my question is could there be more space savings if using bool IsFirst:1 (and the others), I'd also think that would help clarity a little (or did I misunderstand?) MyDeveloperDay: educate me, why
```
unsigned IsFirst : 1;
```
here and not
```
bool IsFirst : 1;
```
is… | |||||
It has to do with the ABI since as per [class.bit]p1: [...] Allocation of bit-fields within a class object is implementation-defined. Alignment of bit fields is implementation-defined. Bit-fields are packed into some addressable allocation unit. [Note: Bit-fields straddle allocation units on some machines and not on others. Bit-fields are assigned right-to-left on some machines, left-to-right on others. — end note ] Now the two relevant ABIs are the Itanium ABI (https://github.com/itanium-cxx-abi/cxx-abi/blob/master/abi-layout.html for the details) and the MS ABI (say on x86-64). Happily both are supported by clang so we can use -fdump-record-layouts and compare them. Consider S0 in https://godbolt.org/z/orYv5j (itanium on the left/MS on the right): Both ABIs will not let a bit-field cross a boundary. Therefore c0 and c1 will use 10 bits. If the type had been short instead of char then only 9 bits would have been used. The size of S0 would still have been 2 in both cases. Consider now S1. The MS ABI, unlike the Itanium ABI, will not put bit-fields together if their types have a different size. Therefore sizeof(S1) is 2 under the Itanium ABI and 4 under the MS ABI. Using an unsigned systematically avoids having a boundary every 8 bits, and avoids the issue with the MS ABI. riccibruno: It has to do with the ABI since as per [class.bit]p1:
`[...] Allocation of bit-fields within a… | |||||
/// Whether there must be a line break before this token. | /// Whether there must be a line break before this token. | ||||
/// | /// | ||||
/// This happens for example when a preprocessor directive ended directly | /// This happens for example when a preprocessor directive ended directly | ||||
/// before the token. | /// before the token. | ||||
bool MustBreakBefore = false; | unsigned MustBreakBefore : 1; | ||||
/// Whether to not align across this token | /// Whether to not align across this token | ||||
/// | /// | ||||
/// This happens for example when a preprocessor directive ended directly | /// This happens for example when a preprocessor directive ended directly | ||||
/// before the token, but very rarely otherwise. | /// before the token, but very rarely otherwise. | ||||
bool MustBreakAlignBefore = false; | unsigned MustBreakAlignBefore : 1; | ||||
/// The raw text of the token. | /// Set to \c true if this token is an unterminated literal. | ||||
unsigned IsUnterminatedLiteral : 1; | |||||
/// \c true if it is allowed to break before this token. | |||||
unsigned CanBreakBefore : 1; | |||||
/// \c true if this is the ">" of "template<..>". | |||||
unsigned ClosesTemplateDeclaration : 1; | |||||
/// \c true if this token starts a binary expression, i.e. has at least | |||||
/// one fake l_paren with a precedence greater than prec::Unknown. | |||||
unsigned StartsBinaryExpression : 1; | |||||
/// \c true if this token ends a binary expression. | |||||
unsigned EndsBinaryExpression : 1; | |||||
/// Is this token part of a \c DeclStmt defining multiple variables? | |||||
/// | /// | ||||
/// Contains the raw token text without leading whitespace and without leading | /// Only set if \c Type == \c TT_StartOfName. | ||||
/// escaped newlines. | unsigned PartOfMultiVariableDeclStmt : 1; | ||||
StringRef TokenText; | |||||
/// Set to \c true if this token is an unterminated literal. | /// Does this line comment continue a line comment section? | ||||
bool IsUnterminatedLiteral = 0; | /// | ||||
/// Only set to true if \c Type == \c TT_LineComment. | |||||
unsigned ContinuesLineCommentSection : 1; | |||||
/// If \c true, this token has been fully formatted (indented and | |||||
/// potentially re-formatted inside), and we do not allow further formatting | |||||
/// changes. | |||||
unsigned Finalized : 1; | |||||
private: | |||||
/// Contains the kind of block if this token is a brace. | /// Contains the kind of block if this token is a brace. | ||||
BraceBlockKind BlockKind = BK_Unknown; | unsigned BlockKind : 2; | ||||
public: | |||||
BraceBlockKind getBlockKind() const { | |||||
return static_cast<BraceBlockKind>(BlockKind); | |||||
} | |||||
void setBlockKind(BraceBlockKind BBK) { | |||||
BlockKind = BBK; | |||||
assert(getBlockKind() == BBK && "BraceBlockKind overflow!"); | |||||
} | |||||
private: | |||||
unsigned Type : 8; | |||||
public: | |||||
/// Returns the token's type, e.g. whether "<" is a template opener or | /// Returns the token's type, e.g. whether "<" is a template opener or | ||||
/// binary operator. | /// binary operator. | ||||
TokenType getType() const { return Type; } | TokenType getType() const { return static_cast<TokenType>(Type); } | ||||
void setType(TokenType T) { Type = T; } | void setType(TokenType T) { | ||||
Type = T; | |||||
assert(getType() == T && "TokenType overflow!"); | |||||
} | |||||
/// The number of spaces that should be inserted before this token. | private: | ||||
unsigned SpacesRequiredBefore = 0; | /// Stores the formatting decision for the token once it was made. | ||||
unsigned Decision : 2; | |||||
/// \c true if it is allowed to break before this token. | public: | ||||
bool CanBreakBefore = false; | FormatDecision getDecision() const { | ||||
return static_cast<FormatDecision>(Decision); | |||||
} | |||||
void setDecision(FormatDecision D) { | |||||
Decision = D; | |||||
assert(getDecision() == D && "FormatDecision overflow!"); | |||||
} | |||||
/// \c true if this is the ">" of "template<..>". | private: | ||||
bool ClosesTemplateDeclaration = false; | /// If this is an opening parenthesis, how are the parameters packed? | ||||
unsigned PackingKind : 2; | |||||
public: | |||||
ParameterPackingKind getPackingKind() const { | |||||
return static_cast<ParameterPackingKind>(PackingKind); | |||||
} | |||||
void setPackingKind(ParameterPackingKind K) { | |||||
PackingKind = K; | |||||
assert(getPackingKind() == K && "ParameterPackingKind overflow!"); | |||||
} | |||||
/// The number of newlines immediately before the \c Token. | |||||
/// | |||||
/// This can be used to determine what the user wrote in the original code | |||||
/// and thereby e.g. leave an empty line between two function definitions. | |||||
unsigned NewlinesBefore = 0; | |||||
/// The offset just past the last '\n' in this token's leading | |||||
/// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'. | |||||
unsigned LastNewlineOffset = 0; | |||||
/// The width of the non-whitespace parts of the token (or its first | |||||
/// line for multi-line tokens) in columns. | |||||
/// We need this to correctly measure number of columns a token spans. | |||||
unsigned ColumnWidth = 0; | |||||
/// Contains the width in columns of the last line of a multi-line | |||||
/// token. | |||||
unsigned LastLineColumnWidth = 0; | |||||
/// The number of spaces that should be inserted before this token. | |||||
unsigned SpacesRequiredBefore = 0; | |||||
/// Number of parameters, if this is "(", "[" or "<". | /// Number of parameters, if this is "(", "[" or "<". | ||||
unsigned ParameterCount = 0; | unsigned ParameterCount = 0; | ||||
/// Number of parameters that are nested blocks, | /// Number of parameters that are nested blocks, | ||||
/// if this is "(", "[" or "<". | /// if this is "(", "[" or "<". | ||||
unsigned BlockParameterCount = 0; | unsigned BlockParameterCount = 0; | ||||
/// If this is a bracket ("<", "(", "[" or "{"), contains the kind of | /// If this is a bracket ("<", "(", "[" or "{"), contains the kind of | ||||
/// the surrounding bracket. | /// the surrounding bracket. | ||||
tok::TokenKind ParentBracket = tok::unknown; | tok::TokenKind ParentBracket = tok::unknown; | ||||
/// A token can have a special role that can carry extra information | |||||
/// about the token's formatting. | |||||
std::unique_ptr<TokenRole> Role; | |||||
/// If this is an opening parenthesis, how are the parameters packed? | |||||
ParameterPackingKind PackingKind = PPK_Inconclusive; | |||||
/// The total length of the unwrapped line up to and including this | /// The total length of the unwrapped line up to and including this | ||||
/// token. | /// token. | ||||
unsigned TotalLength = 0; | unsigned TotalLength = 0; | ||||
/// The original 0-based column of this token, including expanded tabs. | /// The original 0-based column of this token, including expanded tabs. | ||||
/// The configured TabWidth is used as tab width. | /// The configured TabWidth is used as tab width. | ||||
unsigned OriginalColumn = 0; | unsigned OriginalColumn = 0; | ||||
Show All 37 Lines | public: | ||||
/// corresponding operator precedence. | /// corresponding operator precedence. | ||||
/// | /// | ||||
/// If multiple fake parentheses start at a token, this vector stores them in | /// If multiple fake parentheses start at a token, this vector stores them in | ||||
/// reverse order, i.e. inner fake parenthesis first. | /// reverse order, i.e. inner fake parenthesis first. | ||||
SmallVector<prec::Level, 4> FakeLParens; | SmallVector<prec::Level, 4> FakeLParens; | ||||
/// Insert this many fake ) after this token for correct indentation. | /// Insert this many fake ) after this token for correct indentation. | ||||
unsigned FakeRParens = 0; | unsigned FakeRParens = 0; | ||||
/// \c true if this token starts a binary expression, i.e. has at least | |||||
/// one fake l_paren with a precedence greater than prec::Unknown. | |||||
bool StartsBinaryExpression = false; | |||||
/// \c true if this token ends a binary expression. | |||||
bool EndsBinaryExpression = false; | |||||
/// If this is an operator (or "."/"->") in a sequence of operators | /// If this is an operator (or "."/"->") in a sequence of operators | ||||
/// with the same precedence, contains the 0-based operator index. | /// with the same precedence, contains the 0-based operator index. | ||||
unsigned OperatorIndex = 0; | unsigned OperatorIndex = 0; | ||||
/// If this is an operator (or "."/"->") in a sequence of operators | /// If this is an operator (or "."/"->") in a sequence of operators | ||||
/// with the same precedence, points to the next operator. | /// with the same precedence, points to the next operator. | ||||
FormatToken *NextOperator = nullptr; | FormatToken *NextOperator = nullptr; | ||||
/// Is this token part of a \c DeclStmt defining multiple variables? | |||||
/// | |||||
/// Only set if \c Type == \c TT_StartOfName. | |||||
bool PartOfMultiVariableDeclStmt = false; | |||||
/// Does this line comment continue a line comment section? | |||||
/// | |||||
/// Only set to true if \c Type == \c TT_LineComment. | |||||
bool ContinuesLineCommentSection = false; | |||||
/// If this is a bracket, this points to the matching one. | /// If this is a bracket, this points to the matching one. | ||||
FormatToken *MatchingParen = nullptr; | FormatToken *MatchingParen = nullptr; | ||||
/// The previous token in the unwrapped line. | /// The previous token in the unwrapped line. | ||||
FormatToken *Previous = nullptr; | FormatToken *Previous = nullptr; | ||||
/// The next token in the unwrapped line. | /// The next token in the unwrapped line. | ||||
FormatToken *Next = nullptr; | FormatToken *Next = nullptr; | ||||
/// If this token starts a block, this contains all the unwrapped lines | /// If this token starts a block, this contains all the unwrapped lines | ||||
/// in it. | /// in it. | ||||
SmallVector<AnnotatedLine *, 1> Children; | SmallVector<AnnotatedLine *, 1> Children; | ||||
/// Stores the formatting decision for the token once it was made. | |||||
FormatDecision Decision = FD_Unformatted; | |||||
/// If \c true, this token has been fully formatted (indented and | |||||
/// potentially re-formatted inside), and we do not allow further formatting | |||||
/// changes. | |||||
bool Finalized = false; | |||||
bool is(tok::TokenKind Kind) const { return Tok.is(Kind); } | bool is(tok::TokenKind Kind) const { return Tok.is(Kind); } | ||||
bool is(TokenType TT) const { return Type == TT; } | bool is(TokenType TT) const { return getType() == TT; } | ||||
bool is(const IdentifierInfo *II) const { | bool is(const IdentifierInfo *II) const { | ||||
return II && II == Tok.getIdentifierInfo(); | return II && II == Tok.getIdentifierInfo(); | ||||
} | } | ||||
bool is(tok::PPKeywordKind Kind) const { | bool is(tok::PPKeywordKind Kind) const { | ||||
return Tok.getIdentifierInfo() && | return Tok.getIdentifierInfo() && | ||||
Tok.getIdentifierInfo()->getPPKeywordID() == Kind; | Tok.getIdentifierInfo()->getPPKeywordID() == Kind; | ||||
} | } | ||||
bool is(BraceBlockKind BBK) const { return getBlockKind() == BBK; } | |||||
bool is(ParameterPackingKind PPK) const { return getPackingKind() == PPK; } | |||||
template <typename A, typename B> bool isOneOf(A K1, B K2) const { | template <typename A, typename B> bool isOneOf(A K1, B K2) const { | ||||
return is(K1) || is(K2); | return is(K1) || is(K2); | ||||
} | } | ||||
template <typename A, typename B, typename... Ts> | template <typename A, typename B, typename... Ts> | ||||
bool isOneOf(A K1, B K2, Ts... Ks) const { | bool isOneOf(A K1, B K2, Ts... Ks) const { | ||||
return is(K1) || isOneOf(K2, Ks...); | return is(K1) || isOneOf(K2, Ks...); | ||||
} | } | ||||
template <typename T> bool isNot(T Kind) const { return !is(Kind); } | template <typename T> bool isNot(T Kind) const { return !is(Kind); } | ||||
bool isIf(bool AllowConstexprMacro = true) const { | bool isIf(bool AllowConstexprMacro = true) const { | ||||
return is(tok::kw_if) || endsSequence(tok::kw_constexpr, tok::kw_if) || | return is(tok::kw_if) || endsSequence(tok::kw_constexpr, tok::kw_if) || | ||||
(endsSequence(tok::identifier, tok::kw_if) && AllowConstexprMacro); | (endsSequence(tok::identifier, tok::kw_if) && AllowConstexprMacro); | ||||
} | } | ||||
bool closesScopeAfterBlock() const { | bool closesScopeAfterBlock() const { | ||||
if (BlockKind == BK_Block) | if (getBlockKind() == BK_Block) | ||||
return true; | return true; | ||||
if (closesScope()) | if (closesScope()) | ||||
return Previous->closesScopeAfterBlock(); | return Previous->closesScopeAfterBlock(); | ||||
return false; | return false; | ||||
} | } | ||||
/// \c true if this token starts a sequence with the given tokens in order, | /// \c true if this token starts a sequence with the given tokens in order, | ||||
/// following the ``Next`` pointers, ignoring comments. | /// following the ``Next`` pointers, ignoring comments. | ||||
▲ Show 20 Lines • Show All 153 Lines • ▼ Show 20 Lines | while (Tok && Tok->is(tok::comment)) | ||||
Tok = Tok->Next; | Tok = Tok->Next; | ||||
return Tok; | return Tok; | ||||
} | } | ||||
/// Returns \c true if this tokens starts a block-type list, i.e. a | /// Returns \c true if this tokens starts a block-type list, i.e. a | ||||
/// list that should be indented with a block indent. | /// list that should be indented with a block indent. | ||||
bool opensBlockOrBlockTypeList(const FormatStyle &Style) const { | bool opensBlockOrBlockTypeList(const FormatStyle &Style) const { | ||||
// C# Does not indent object initialisers as continuations. | // C# Does not indent object initialisers as continuations. | ||||
if (is(tok::l_brace) && BlockKind == BK_BracedInit && Style.isCSharp()) | if (is(tok::l_brace) && getBlockKind() == BK_BracedInit && Style.isCSharp()) | ||||
return true; | return true; | ||||
if (is(TT_TemplateString) && opensScope()) | if (is(TT_TemplateString) && opensScope()) | ||||
return true; | return true; | ||||
return is(TT_ArrayInitializerLSquare) || is(TT_ProtoExtensionLSquare) || | return is(TT_ArrayInitializerLSquare) || is(TT_ProtoExtensionLSquare) || | ||||
(is(tok::l_brace) && | (is(tok::l_brace) && | ||||
(BlockKind == BK_Block || is(TT_DictLiteral) || | (getBlockKind() == BK_Block || is(TT_DictLiteral) || | ||||
(!Style.Cpp11BracedListStyle && NestingLevel == 0))) || | (!Style.Cpp11BracedListStyle && NestingLevel == 0))) || | ||||
(is(tok::less) && (Style.Language == FormatStyle::LK_Proto || | (is(tok::less) && (Style.Language == FormatStyle::LK_Proto || | ||||
Style.Language == FormatStyle::LK_TextProto)); | Style.Language == FormatStyle::LK_TextProto)); | ||||
} | } | ||||
/// Returns whether the token is the left square bracket of a C++ | /// Returns whether the token is the left square bracket of a C++ | ||||
/// structured binding declaration. | /// structured binding declaration. | ||||
bool isCppStructuredBinding(const FormatStyle &Style) const { | bool isCppStructuredBinding(const FormatStyle &Style) const { | ||||
▲ Show 20 Lines • Show All 54 Lines • ▼ Show 20 Lines | private: | ||||
} | } | ||||
template <typename A, typename... Ts> | template <typename A, typename... Ts> | ||||
bool endsSequenceInternal(A K1, Ts... Tokens) const { | bool endsSequenceInternal(A K1, Ts... Tokens) const { | ||||
if (is(tok::comment) && Previous) | if (is(tok::comment) && Previous) | ||||
return Previous->endsSequenceInternal(K1, Tokens...); | return Previous->endsSequenceInternal(K1, Tokens...); | ||||
return is(K1) && Previous && Previous->endsSequenceInternal(Tokens...); | return is(K1) && Previous && Previous->endsSequenceInternal(Tokens...); | ||||
} | } | ||||
TokenType Type = TT_Unknown; | |||||
}; | }; | ||||
class ContinuationIndenter; | class ContinuationIndenter; | ||||
struct LineState; | struct LineState; | ||||
class TokenRole { | class TokenRole { | ||||
public: | public: | ||||
TokenRole(const FormatStyle &Style) : Style(Style) {} | TokenRole(const FormatStyle &Style) : Style(Style) {} | ||||
▲ Show 20 Lines • Show All 447 Lines • Show Last 20 Lines |
I much prefer putting the initialization here, I think it makes it MUCH clearer