Index: clang/docs/ClangFormatStyleOptions.rst =================================================================== --- clang/docs/ClangFormatStyleOptions.rst +++ clang/docs/ClangFormatStyleOptions.rst @@ -3159,6 +3159,36 @@ +**IntegerLiteralSeparator** (``IntegerLiteralSeparatorStyle``) :versionbadge:`clang-format 16` + Format C++ integer literal separators. + + Nested configuration flags: + + Separator format of C++ integer literals of different bases. + -1: Remove separators. + 0: Leave the literal as is. + >0: Insert separators between digits, starting from the rightmost digit. + + * ``int8_t Binary`` .. code-block:: c++ + + -1: 0b100111101101 + 0: 0b10011'11'0110'1 + 3: 0b100'111'101'101 + 4: 0b1001'1110'1101 + + * ``int8_t Decimal`` .. code-block:: c++ + + -1: 18446744073709550592ull + 0: 184467'440737'0'95505'92ull + 3: 18'446'744'073'709'550'592ull + + * ``int8_t Hex`` .. code-block:: c++ + + -1: 0xDEADBEEFDEADBEEFull + 0: 0xDEAD'BEEF'DE'AD'BEE'Full + 2: 0xDE'AD'BE'EF'DE'AD'BE'EFull + + **JavaImportGroups** (``List of Strings``) :versionbadge:`clang-format 8` A vector of prefixes ordered by the desired groups for Java imports. Index: clang/include/clang/Format/Format.h =================================================================== --- clang/include/clang/Format/Format.h +++ clang/include/clang/Format/Format.h @@ -2388,6 +2388,36 @@ /// \version 3.7 bool IndentWrappedFunctionNames; + /// Separator format of C++ integer literals of different bases. + /// -1: Remove separators. + /// 0: Leave the literal as is. + /// >0: Insert separators between digits, starting from the rightmost digit. + struct IntegerLiteralSeparatorStyle { + /// \code + /// -1: 0b100111101101 + /// 0: 0b10011'11'0110'1 + /// 3: 0b100'111'101'101 + /// 4: 0b1001'1110'1101 + /// \endcode + int8_t Binary; + /// \code + /// -1: 18446744073709550592ull + /// 0: 184467'440737'0'95505'92ull + /// 3: 18'446'744'073'709'550'592ull + /// \endcode + int8_t Decimal; + /// \code + /// -1: 0xDEADBEEFDEADBEEFull + /// 0: 0xDEAD'BEEF'DE'AD'BEE'Full + /// 2: 0xDE'AD'BE'EF'DE'AD'BE'EFull + /// \endcode + int8_t Hex; + }; + + /// Format C++ integer literal separators. + /// \version 16 + IntegerLiteralSeparatorStyle IntegerLiteralSeparator; + /// Insert braces after control statements (``if``, ``else``, ``for``, ``do``, /// and ``while``) in C++ unless the control statements are inside macro /// definitions or the braces would enclose preprocessor directives. @@ -4088,6 +4118,10 @@ IndentRequiresClause == R.IndentRequiresClause && IndentWidth == R.IndentWidth && IndentWrappedFunctionNames == R.IndentWrappedFunctionNames && + IntegerLiteralSeparator.Binary == R.IntegerLiteralSeparator.Binary && + IntegerLiteralSeparator.Decimal == + R.IntegerLiteralSeparator.Decimal && + IntegerLiteralSeparator.Hex == R.IntegerLiteralSeparator.Hex && InsertBraces == R.InsertBraces && JavaImportGroups == R.JavaImportGroups && JavaScriptQuotes == R.JavaScriptQuotes && Index: clang/lib/Format/CMakeLists.txt =================================================================== --- clang/lib/Format/CMakeLists.txt +++ clang/lib/Format/CMakeLists.txt @@ -8,6 +8,7 @@ Format.cpp FormatToken.cpp FormatTokenLexer.cpp + IntegerLiteralSeparatorFixer.cpp MacroCallReconstructor.cpp MacroExpander.cpp NamespaceEndCommentsFixer.cpp Index: clang/lib/Format/Format.cpp =================================================================== --- clang/lib/Format/Format.cpp +++ clang/lib/Format/Format.cpp @@ -20,6 +20,7 @@ #include "FormatInternal.h" #include "FormatToken.h" #include "FormatTokenLexer.h" +#include "IntegerLiteralSeparatorFixer.h" #include "NamespaceEndCommentsFixer.h" #include "QualifierAlignmentFixer.h" #include "SortJavaScriptImports.h" @@ -335,6 +336,14 @@ } }; +template <> struct MappingTraits { + static void mapping(IO &IO, FormatStyle::IntegerLiteralSeparatorStyle &Base) { + IO.mapOptional("Binary", Base.Binary); + IO.mapOptional("Decimal", Base.Decimal); + IO.mapOptional("Hex", Base.Hex); + } +}; + template <> struct ScalarEnumerationTraits { static void enumeration(IO &IO, FormatStyle::JavaScriptQuoteStyle &Value) { IO.enumCase(Value, "Leave", FormatStyle::JSQS_Leave); @@ -881,6 +890,7 @@ Style.IndentWrappedFunctionNames); IO.mapOptional("InsertBraces", Style.InsertBraces); IO.mapOptional("InsertTrailingCommas", Style.InsertTrailingCommas); + IO.mapOptional("IntegerLiteralSeparator", Style.IntegerLiteralSeparator); IO.mapOptional("JavaImportGroups", Style.JavaImportGroups); IO.mapOptional("JavaScriptQuotes", Style.JavaScriptQuotes); IO.mapOptional("JavaScriptWrapImports", Style.JavaScriptWrapImports); @@ -1335,6 +1345,7 @@ LLVMStyle.IndentWrappedFunctionNames = false; LLVMStyle.InsertBraces = false; LLVMStyle.InsertTrailingCommas = FormatStyle::TCS_None; + LLVMStyle.IntegerLiteralSeparator = {/*Binary=*/0, /*Decimal=*/0, /*Hex=*/0}; LLVMStyle.JavaScriptQuotes = FormatStyle::JSQS_Leave; LLVMStyle.JavaScriptWrapImports = true; LLVMStyle.KeepEmptyLinesAtTheStartOfBlocks = true; @@ -3392,6 +3403,10 @@ SmallVector Passes; if (Style.isCpp()) { + Passes.emplace_back([&](const Environment &Env) { + return IntegerLiteralSeparatorFixer().process(Env, Expanded); + }); + if (Style.QualifierAlignment != FormatStyle::QAS_Leave) { Passes.emplace_back([&](const Environment &Env) { return QualifierAlignmentFixer(Env, Expanded, Code, Ranges, Index: clang/lib/Format/IntegerLiteralSeparatorFixer.h =================================================================== --- /dev/null +++ clang/lib/Format/IntegerLiteralSeparatorFixer.h @@ -0,0 +1,39 @@ +//===--- IntegerLiteralSeparatorFixer.h -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file declares IntegerLiteralSeparatorFixer that fixes C++ integer +/// literal separators. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_FORMAT_INTEGERLITERALSEPARATORFIXER_H +#define LLVM_CLANG_LIB_FORMAT_INTEGERLITERALSEPARATORFIXER_H + +#include "TokenAnalyzer.h" + +namespace clang { +namespace format { + +class IntegerLiteralSeparatorFixer { +public: + std::pair + process(const Environment &Env, const FormatStyle &Style) const; + +private: + static const auto Separator = '\''; + + bool checkSeparator(const StringRef IntegerLiteral, int DigitsPerGroup) const; + std::string format(const StringRef IntegerLiteral, int DigitsPerGroup, + bool RemoveSeparator) const; +}; + +} // end namespace format +} // end namespace clang + +#endif Index: clang/lib/Format/IntegerLiteralSeparatorFixer.cpp =================================================================== --- /dev/null +++ clang/lib/Format/IntegerLiteralSeparatorFixer.cpp @@ -0,0 +1,174 @@ +//===--- IntegerLiteralSeparatorFixer.cpp -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements IntegerLiteralSeparatorFixer that fixes C++ integer +/// literal separators. +/// +//===----------------------------------------------------------------------===// + +#include "IntegerLiteralSeparatorFixer.h" + +namespace clang { +namespace format { + +enum class Base { Binary, Decimal, Hex, Other }; + +static Base getBase(const StringRef IntegerLiteral) { + assert(IntegerLiteral.size() > 1); + + if (IntegerLiteral[0] > '0') { + assert(IntegerLiteral[0] <= '9'); + return Base::Decimal; + } + + assert(IntegerLiteral[0] == '0'); + + switch (IntegerLiteral[1]) { + case 'b': + case 'B': + return Base::Binary; + case 'x': + case 'X': + return Base::Hex; + default: + return Base::Other; + } +} + +std::pair +IntegerLiteralSeparatorFixer::process(const Environment &Env, + const FormatStyle &Style) const { + const auto &Option = Style.IntegerLiteralSeparator; + const auto Binary = Option.Binary; + const auto Decimal = Option.Decimal; + const auto Hex = Option.Hex; + const bool SkipBinary = Binary == 0; + const bool SkipDecimal = Decimal == 0; + const bool SkipHex = Hex == 0; + + if (SkipBinary && SkipDecimal && SkipHex) + return {}; + + const auto ID = Env.getFileID(); + const auto &SourceMgr = Env.getSourceManager(); + std::unique_ptr Lex; + Lex.reset(new Lexer(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, + getFormattingLangOpts(Style))); + Lex->SetCommentRetentionState(true); + + Token Tok; + Lex->LexFromRawLexer(Tok); + + tooling::Replacements Result; + for (bool Skip = false; Tok.isNot(tok::eof); Lex->LexFromRawLexer(Tok)) { + auto Length = Tok.getLength(); + if (Length < 2) + continue; + auto Location = Tok.getLocation(); + auto Text = StringRef(SourceMgr.getCharacterData(Location), Length); + if (Tok.is(tok::comment)) { + if (Text == "// clang-format off" || Text == "/* clang-format off */") + Skip = true; + if (Text == "// clang-format on" || Text == "/* clang-format on */") + Skip = false; + continue; + } + if (Skip || Tok.isNot(tok::numeric_constant)) + continue; + if (Text.find_first_of(".eEpP") != StringRef::npos) + continue; + const auto B = getBase(Text); + const bool IsBase2 = B == Base::Binary; + const bool IsBase10 = B == Base::Decimal; + const bool IsBase16 = B == Base::Hex; + if ((IsBase2 && SkipBinary) || (IsBase10 && SkipDecimal) || + (IsBase16 && SkipHex) || B == Base::Other) { + continue; + } + const bool RemoveSeparator = (IsBase2 && Binary == -1) || + (IsBase10 && Decimal == -1) || + (IsBase16 && Hex == -1); + if (RemoveSeparator && Text.find(Separator) == StringRef::npos) + continue; + const auto Start = Text[0] == '0' ? 2 : 0; + auto End = Text.find_first_of("uUlLzZ"); + if (End == StringRef::npos) + End = Text.size(); + Length = End - Start; + Text = Text.substr(Start, Length); + auto DigitsPerGroup = Decimal; + if (IsBase2) + DigitsPerGroup = Binary; + else if (IsBase16) + DigitsPerGroup = Hex; + if (checkSeparator(Text, DigitsPerGroup)) + continue; + if (Start > 0) + Location = Location.getLocWithOffset(Start); + cantFail(Result.add( + tooling::Replacement(SourceMgr, Location, Length, + format(Text, DigitsPerGroup, RemoveSeparator)))); + } + + return {Result, 0}; +} + +bool IntegerLiteralSeparatorFixer::checkSeparator( + const StringRef IntegerLiteral, int DigitsPerGroup) const { + assert(DigitsPerGroup > 0); + + int I = 0; + for (auto C : llvm::reverse(IntegerLiteral)) { + if (C == Separator) { + if (I < DigitsPerGroup) + return false; + I = 0; + } else { + ++I; + if (I == DigitsPerGroup) + return false; + } + } + + return true; +} + +std::string IntegerLiteralSeparatorFixer::format(const StringRef IntegerLiteral, + int DigitsPerGroup, + bool RemoveSeparator) const { + int DigitCount = 0; + for (auto C : IntegerLiteral) + if (C != Separator) + ++DigitCount; + + int Remainder = DigitCount % DigitsPerGroup; + + std::string Formatted; + int I = 0; + for (auto C : IntegerLiteral) { + if (C == Separator) + continue; + if (RemoveSeparator) { + Formatted.push_back(C); + continue; + } + if (I == (Remainder > 0 ? Remainder : DigitsPerGroup)) { + Formatted.push_back(Separator); + I = 0; + Remainder = 0; + } + Formatted.push_back(C); + ++I; + } + + return Formatted; +} + +} // namespace format +} // namespace clang