Index: clang-tools-extra/clang-tidy/modernize/CMakeLists.txt =================================================================== --- clang-tools-extra/clang-tidy/modernize/CMakeLists.txt +++ clang-tools-extra/clang-tidy/modernize/CMakeLists.txt @@ -9,6 +9,7 @@ ConcatNestedNamespacesCheck.cpp DeprecatedHeadersCheck.cpp DeprecatedIosBaseAliasesCheck.cpp + IntegralLiteralExpressionMatcher.cpp LoopConvertCheck.cpp LoopConvertUtils.cpp MacroToEnumCheck.cpp Index: clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.h =================================================================== --- /dev/null +++ clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.h @@ -0,0 +1,69 @@ +//===--- IntegralLiteralExpressionMatcher.h - clang-tidy ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include + +namespace clang { +namespace tidy { +namespace modernize { + +// Parses an array of tokens and returns true if they conform to the rules of +// C++ for whole expressions involving integral literals. Follows the operator +// precedence rules of C++. +class IntegralLiteralExpressionMatcher { +public: + IntegralLiteralExpressionMatcher(ArrayRef Tokens) + : Current(Tokens.begin()), End(Tokens.end()) {} + + bool match(); + +private: + bool advance(); + bool consume(tok::TokenKind Kind); + bool nonTerminalChainedExpr( + bool (IntegralLiteralExpressionMatcher::*NonTerminal)(), + const std::function &IsKind); + template + bool nonTerminalChainedExpr( + bool (IntegralLiteralExpressionMatcher::*NonTerminal)()) { + return nonTerminalChainedExpr(NonTerminal, + [](Token Tok) { return Tok.is(Kind); }); + } + template + bool nonTerminalChainedExpr( + bool (IntegralLiteralExpressionMatcher::*NonTerminal)()) { + return nonTerminalChainedExpr( + NonTerminal, [](Token Tok) { return Tok.isOneOf(K1, K2, Ks...); }); + } + + bool unaryOperator(); + bool unaryExpr(); + bool multiplicativeExpr(); + bool additiveExpr(); + bool shiftExpr(); + bool compareExpr(); + bool relationalExpr(); + bool equalityExpr(); + bool andExpr(); + bool exclusiveOrExpr(); + bool inclusiveOrExpr(); + bool logicalAndExpr(); + bool logicalOrExpr(); + bool conditionalExpr(); + bool expr(); + + ArrayRef::iterator Current; + ArrayRef::iterator End; +}; + +} // namespace modernize +} // namespace tidy +} // namespace clang Index: clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.cpp =================================================================== --- /dev/null +++ clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.cpp @@ -0,0 +1,214 @@ +//===--- IntegralLiteralExpressionMatcher.cpp - clang-tidy ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "IntegralLiteralExpressionMatcher.h" + +#include + +namespace clang { +namespace tidy { +namespace modernize { + +// Validate that this literal token is a valid integer literal. A literal token +// could be a floating-point token, which isn't acceptable as a value for an +// enumeration. A floating-point token must either have a decimal point or an +// exponent ('E' or 'P'). +static bool isIntegralConstant(const Token &Token) { + const char *Begin = Token.getLiteralData(); + const char *End = Begin + Token.getLength(); + + // not a hexadecimal floating-point literal + if (Token.getLength() > 2 && Begin[0] == '0' && std::toupper(Begin[1]) == 'X') + return std::none_of(Begin + 2, End, [](char C) { + return C == '.' || std::toupper(C) == 'P'; + }); + + // not a decimal floating-point literal + return std::none_of( + Begin, End, [](char C) { return C == '.' || std::toupper(C) == 'E'; }); +} + +bool IntegralLiteralExpressionMatcher::advance() { + ++Current; + return Current != End; +} + +bool IntegralLiteralExpressionMatcher::consume(tok::TokenKind Kind) { + if (Current->is(Kind)) { + ++Current; + return true; + } + + return false; +} + +bool IntegralLiteralExpressionMatcher::nonTerminalChainedExpr( + bool (IntegralLiteralExpressionMatcher::*NonTerminal)(), + const std::function &IsKind) { + if (!(this->*NonTerminal)()) + return false; + if (Current == End) + return true; + + while (Current != End) { + if (!IsKind(*Current)) + break; + + if (!advance()) + return false; + + if (!(this->*NonTerminal)()) + return false; + } + + return true; +} + +// Advance over unary operators +bool IntegralLiteralExpressionMatcher::unaryOperator() { + if (Current->isOneOf(tok::TokenKind::minus, tok::TokenKind::plus, + tok::TokenKind::tilde, tok::TokenKind::exclaim)) { + return advance(); + } + + return true; +} + +bool IntegralLiteralExpressionMatcher::unaryExpr() { + if (!unaryOperator()) + return false; + + if (consume(tok::TokenKind::l_paren)) { + if (Current == End) + return false; + + if (!expr()) + return false; + + if (Current == End) + return false; + + return consume(tok::TokenKind::r_paren); + } + + if (!Current->isLiteral() || isStringLiteral(Current->getKind()) || + !isIntegralConstant(*Current)) { + return false; + } + ++Current; + return true; +} + +bool IntegralLiteralExpressionMatcher::multiplicativeExpr() { + return nonTerminalChainedExpr( + &IntegralLiteralExpressionMatcher::unaryExpr); +} + +bool IntegralLiteralExpressionMatcher::additiveExpr() { + return nonTerminalChainedExpr( + &IntegralLiteralExpressionMatcher::multiplicativeExpr); +} + +bool IntegralLiteralExpressionMatcher::shiftExpr() { + return nonTerminalChainedExpr( + &IntegralLiteralExpressionMatcher::additiveExpr); +} + +bool IntegralLiteralExpressionMatcher::compareExpr() { + if (!shiftExpr()) + return false; + if (Current == End) + return true; + + if (Current->is(tok::TokenKind::spaceship)) { + if (!advance()) + return false; + + if (!shiftExpr()) + return false; + } + + return true; +} + +bool IntegralLiteralExpressionMatcher::relationalExpr() { + return nonTerminalChainedExpr( + &IntegralLiteralExpressionMatcher::compareExpr); +} + +bool IntegralLiteralExpressionMatcher::equalityExpr() { + return nonTerminalChainedExpr( + &IntegralLiteralExpressionMatcher::relationalExpr); +} + +bool IntegralLiteralExpressionMatcher::andExpr() { + return nonTerminalChainedExpr( + &IntegralLiteralExpressionMatcher::equalityExpr); +} + +bool IntegralLiteralExpressionMatcher::exclusiveOrExpr() { + return nonTerminalChainedExpr( + &IntegralLiteralExpressionMatcher::andExpr); +} + +bool IntegralLiteralExpressionMatcher::inclusiveOrExpr() { + return nonTerminalChainedExpr( + &IntegralLiteralExpressionMatcher::exclusiveOrExpr); +} + +bool IntegralLiteralExpressionMatcher::logicalAndExpr() { + return nonTerminalChainedExpr( + &IntegralLiteralExpressionMatcher::inclusiveOrExpr); +} + +bool IntegralLiteralExpressionMatcher::logicalOrExpr() { + return nonTerminalChainedExpr( + &IntegralLiteralExpressionMatcher::logicalAndExpr); +} + +bool IntegralLiteralExpressionMatcher::conditionalExpr() { + if (!logicalOrExpr()) + return false; + if (Current == End) + return true; + + if (Current->is(tok::TokenKind::question)) { + if (!advance()) + return false; + + if (!expr()) + return false; + if (Current == End) + return false; + + if (!Current->is(tok::TokenKind::colon)) + return false; + + if (!advance()) + return false; + + if (!expr()) + return false; + } + return true; +} + +bool IntegralLiteralExpressionMatcher::expr() { return conditionalExpr(); } + +bool IntegralLiteralExpressionMatcher::match() { + return expr() && Current == End; +} + +} // namespace modernize +} // namespace tidy +} // namespace clang Index: clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp =================================================================== --- clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp +++ clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "MacroToEnumCheck.h" +#include "IntegralLiteralExpressionMatcher.h" + #include "clang/AST/ASTContext.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/Lex/Preprocessor.h" @@ -73,25 +75,6 @@ return true; } -// Validate that this literal token is a valid integer literal. A literal token -// could be a floating-point token, which isn't acceptable as a value for an -// enumeration. A floating-point token must either have a decimal point or an -// exponent ('E' or 'P'). -static bool isIntegralConstant(const Token &Token) { - const char *Begin = Token.getLiteralData(); - const char *End = Begin + Token.getLength(); - - // not a hexadecimal floating-point literal - if (Token.getLength() > 2 && Begin[0] == '0' && std::toupper(Begin[1]) == 'X') - return std::none_of(Begin + 2, End, [](char C) { - return C == '.' || std::toupper(C) == 'P'; - }); - - // not a decimal floating-point literal - return std::none_of( - Begin, End, [](char C) { return C == '.' || std::toupper(C) == 'E'; }); -} - static StringRef getTokenName(const Token &Tok) { return Tok.is(tok::raw_identifier) ? Tok.getRawIdentifier() : Tok.getIdentifierInfo()->getName(); @@ -232,6 +215,7 @@ void issueDiagnostics(); void warnMacroEnum(const EnumMacro &Macro) const; void fixEnumMacro(const MacroList &MacroList) const; + bool isInitializer(ArrayRef MacroTokens); MacroToEnumCheck *Check; const LangOptions &LangOpts; @@ -335,6 +319,14 @@ CurrentFile = &Files.back(); } +bool MacroToEnumCallbacks::isInitializer(ArrayRef MacroTokens) +{ + IntegralLiteralExpressionMatcher Matcher(MacroTokens); + bool matched = Matcher.match(); + return matched; +} + + // Any defined but rejected macro is scanned for identifiers that // are to be excluded as enums. void MacroToEnumCallbacks::MacroDefined(const Token &MacroNameTok, @@ -360,55 +352,8 @@ return; } - // Return Lit when +Lit, -Lit or ~Lit; otherwise return Unknown. - Token Unknown; - Unknown.setKind(tok::TokenKind::unknown); - auto GetUnopArg = [Unknown](Token First, Token Second) { - return First.isOneOf(tok::TokenKind::minus, tok::TokenKind::plus, - tok::TokenKind::tilde) - ? Second - : Unknown; - }; - - // It could just be a single token. - Token Tok = MacroTokens.front(); - - // It can be any arbitrary nesting of matched parentheses around - // +Lit, -Lit, ~Lit or Lit. - if (MacroTokens.size() > 2) { - // Strip off matching '(', ..., ')' token pairs. - size_t Begin = 0; - size_t End = MacroTokens.size() - 1; - assert(End >= 2U); - for (; Begin < MacroTokens.size() / 2; ++Begin, --End) { - if (!MacroTokens[Begin].is(tok::TokenKind::l_paren) || - !MacroTokens[End].is(tok::TokenKind::r_paren)) - break; - } - size_t Size = End >= Begin ? (End - Begin + 1U) : 0U; - - // It was a single token inside matching parens. - if (Size == 1) - Tok = MacroTokens[Begin]; - else if (Size == 2) - // It can be +Lit, -Lit or ~Lit. - Tok = GetUnopArg(MacroTokens[Begin], MacroTokens[End]); - else { - // Zero or too many tokens after we stripped matching parens. - rememberExpressionTokens(MacroTokens); - return; - } - } else if (MacroTokens.size() == 2) { - // It can be +Lit, -Lit, or ~Lit. - Tok = GetUnopArg(MacroTokens.front(), MacroTokens.back()); - } - - if (!Tok.isLiteral() || isStringLiteral(Tok.getKind()) || - !isIntegralConstant(Tok)) { - if (Tok.isAnyIdentifier()) - rememberExpressionName(Tok); + if (!isInitializer(MacroTokens)) return; - } if (!isConsecutiveMacro(MD)) newEnum(); Index: clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.cpp =================================================================== --- clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.cpp +++ clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.cpp @@ -11,6 +11,47 @@ #endif +// Macros expanding to expressions involving only literals are converted. +#define EXPR1 1 - 1 +#define EXPR2 1 + 1 +#define EXPR3 1 * 1 +#define EXPR4 1 / 1 +#define EXPR5 1 | 1 +#define EXPR6 1 & 1 +#define EXPR7 1 << 1 +#define EXPR8 1 >> 1 +#define EXPR9 1 % 2 +#define EXPR10 1 ^ 1 +#define EXPR11 (1 + (2)) +#define EXPR12 ((1) + (2 + 0) + (1 * 1) + (1 / 1) + (1 | 1 ) + (1 & 1) + (1 << 1) + (1 >> 1) + (1 % 2) + (1 ^ 1)) +// CHECK-MESSAGES: :[[@LINE-12]]:1: warning: replace macro with enum [modernize-macro-to-enum] +// CHECK-MESSAGES: :[[@LINE-13]]:9: warning: macro 'EXPR1' defines an integral constant; prefer an enum instead +// CHECK-MESSAGES: :[[@LINE-13]]:9: warning: macro 'EXPR2' defines an integral constant; prefer an enum instead +// CHECK-MESSAGES: :[[@LINE-13]]:9: warning: macro 'EXPR3' defines an integral constant; prefer an enum instead +// CHECK-MESSAGES: :[[@LINE-13]]:9: warning: macro 'EXPR4' defines an integral constant; prefer an enum instead +// CHECK-MESSAGES: :[[@LINE-13]]:9: warning: macro 'EXPR5' defines an integral constant; prefer an enum instead +// CHECK-MESSAGES: :[[@LINE-13]]:9: warning: macro 'EXPR6' defines an integral constant; prefer an enum instead +// CHECK-MESSAGES: :[[@LINE-13]]:9: warning: macro 'EXPR7' defines an integral constant; prefer an enum instead +// CHECK-MESSAGES: :[[@LINE-13]]:9: warning: macro 'EXPR8' defines an integral constant; prefer an enum instead +// CHECK-MESSAGES: :[[@LINE-13]]:9: warning: macro 'EXPR9' defines an integral constant; prefer an enum instead +// CHECK-MESSAGES: :[[@LINE-13]]:9: warning: macro 'EXPR10' defines an integral constant; prefer an enum instead +// CHECK-MESSAGES: :[[@LINE-13]]:9: warning: macro 'EXPR11' defines an integral constant; prefer an enum instead +// CHECK-MESSAGES: :[[@LINE-13]]:9: warning: macro 'EXPR12' defines an integral constant; prefer an enum instead +// CHECK-FIXES: enum { +// CHECK-FIXES-NEXT: EXPR1 = 1 - 1, +// CHECK-FIXES-NEXT: EXPR2 = 1 + 1, +// CHECK-FIXES-NEXT: EXPR3 = 1 * 1, +// CHECK-FIXES-NEXT: EXPR4 = 1 / 1, +// CHECK-FIXES-NEXT: EXPR5 = 1 | 1, +// CHECK-FIXES-NEXT: EXPR6 = 1 & 1, +// CHECK-FIXES-NEXT: EXPR7 = 1 << 1, +// CHECK-FIXES-NEXT: EXPR8 = 1 >> 1, +// CHECK-FIXES-NEXT: EXPR9 = 1 % 2, +// CHECK-FIXES-NEXT: EXPR10 = 1 ^ 1, +// CHECK-FIXES-NEXT: EXPR11 = (1 + (2)), +// CHECK-FIXES-NEXT: EXPR12 = ((1) + (2 + 0) + (1 * 1) + (1 / 1) + (1 | 1 ) + (1 & 1) + (1 << 1) + (1 >> 1) + (1 % 2) + (1 ^ 1)) +// CHECK-FIXES-NEXT: }; + #define RED 0xFF0000 #define GREEN 0x00FF00 #define BLUE 0x0000FF @@ -329,7 +370,7 @@ #define INSIDE9 9 bool fn() { - #define INSIDE10 10 +#define INSIDE10 10 return INSIDE9 > 1 || INSIDE10 < N; } Index: clang-tools-extra/unittests/clang-tidy/CMakeLists.txt =================================================================== --- clang-tools-extra/unittests/clang-tidy/CMakeLists.txt +++ clang-tools-extra/unittests/clang-tidy/CMakeLists.txt @@ -25,6 +25,7 @@ GlobListTest.cpp GoogleModuleTest.cpp LLVMModuleTest.cpp + ModernizeModuleTest.cpp NamespaceAliaserTest.cpp ObjCModuleTest.cpp OptionsProviderTest.cpp @@ -53,6 +54,7 @@ clangTidyAndroidModule clangTidyGoogleModule clangTidyLLVMModule + clangTidyModernizeModule clangTidyObjCModule clangTidyReadabilityModule clangTidyUtils Index: clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp =================================================================== --- /dev/null +++ clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp @@ -0,0 +1,200 @@ +#include "ClangTidyTest.h" +#include "modernize/IntegralLiteralExpressionMatcher.h" +#include "clang/Lex/Lexer.h" +#include "gtest/gtest.h" + +#include +#include +#include +#include + +namespace clang { +namespace tidy { +namespace test { + +static std::vector tokenify(const char *Text) { + LangOptions LangOpts; + std::vector Includes; + LangOptions::setLangDefaults(LangOpts, Language::CXX, llvm::Triple(), + Includes, LangStandard::lang_cxx20); + Lexer Lex(SourceLocation{}, LangOpts, Text, Text, Text + std::strlen(Text)); + std::vector Tokens; + bool End = false; + while (!End) { + Token Tok; + End = Lex.LexFromRawLexer(Tok); + Tokens.push_back(Tok); + } + + return Tokens; +} + +static bool matchText(const char *Text) { + std::vector Tokens{tokenify(Text)}; + modernize::IntegralLiteralExpressionMatcher Matcher(Tokens); + + return Matcher.match(); +} + +namespace { + +struct Param { + bool Matched; + const char *Text; +}; + +class MatcherTest : public ::testing::TestWithParam {}; + +} // namespace + +static const Param Params[] = { + // valid integral literals + {true, "1"}, + {true, "0177"}, + {true, "0xdeadbeef"}, + {true, "0b1011"}, + {true, "'c'"}, + // invalid literals + {false, "1.23"}, + {false, "0x1p3"}, + {false, R"("string")"}, + + // literals with unary operators + {true, "-1"}, + {true, "+1"}, + {true, "~1"}, + {true, "!1"}, + // invalid unary operators + {false, "1-"}, + {false, "1+"}, + {false, "1~"}, + {false, "1!"}, + + // valid binary operators + {true, "1+1"}, + {true, "1-1"}, + {true, "1*1"}, + {true, "1/1"}, + {true, "1%2"}, + {true, "1<<1"}, + {true, "1>>1"}, + {true, "1<=>1"}, + {true, "1<1"}, + {true, "1>1"}, + {true, "1<=1"}, + {true, "1>=1"}, + {true, "1==1"}, + {true, "1!=1"}, + {true, "1&1"}, + {true, "1^1"}, + {true, "1|1"}, + {true, "1&&1"}, + {true, "1||1"}, + // invalid binary operator + {false, "1+"}, + {false, "1-"}, + {false, "1*"}, + {false, "1/"}, + {false, "1%"}, + {false, "1<<"}, + {false, "1>>"}, + {false, "1<=>"}, + {false, "1<"}, + {false, "1>"}, + {false, "1<="}, + {false, "1>="}, + {false, "1=="}, + {false, "1!="}, + {false, "1&"}, + {false, "1^"}, + {false, "1|"}, + {false, "1&&"}, + {false, "1||"}, + + // valid ternary operator + {true, "1?1:1"}, + // invalid ternary operator + {false, "?"}, + {false, "?1"}, + {false, "?:"}, + {false, "?:1"}, + {false, "?1:"}, + {false, "?1:1"}, + {false, "1?"}, + {false, "1?1"}, + {false, "1?:"}, + {false, "1?1:"}, + {false, "1?:1"}, + + // parenthesized expressions + {true, "(1)"}, + {true, "((+1))"}, + {true, "((+(1)))"}, + {true, "(-1)"}, + {true, "-(1)"}, + {true, "(+1)"}, + {true, "((+1))"}, + {true, "+(1)"}, + {true, "(~1)"}, + {true, "~(1)"}, + {true, "(!1)"}, + {true, "!(1)"}, + {true, "(1+1)"}, + {true, "(1-1)"}, + {true, "(1*1)"}, + {true, "(1/1)"}, + {true, "(1%2)"}, + {true, "(1<<1)"}, + {true, "(1>>1)"}, + {true, "(1<=>1)"}, + {true, "(1<1)"}, + {true, "(1>1)"}, + {true, "(1<=1)"}, + {true, "(1>=1)"}, + {true, "(1==1)"}, + {true, "(1!=1)"}, + {true, "(1&1)"}, + {true, "(1^1)"}, + {true, "(1|1)"}, + {true, "(1&&1)"}, + {true, "(1||1)"}, + {true, "(1?1:1)"}, + + // more complicated expressions + {true, "1+1+1"}, + {true, "1+1+1+1"}, + {true, "1+1+1+1+1"}, + {true, "1*1*1"}, + {true, "1*1*1*1"}, + {true, "1*1*1*1*1"}, + {true, "1<<1<<1"}, + {true, "4U>>1>>1"}, + {true, "1<1<1"}, + {true, "1>1>1"}, + {true, "1<=1<=1"}, + {true, "1>=1>=1"}, + {true, "1==1==1"}, + {true, "1!=1!=1"}, + {true, "1&1&1"}, + {true, "1^1^1"}, + {true, "1|1|1"}, + {true, "1&&1&&1"}, + {true, "1||1||1"}, +}; + +TEST_P(MatcherTest, MatchResult) { + EXPECT_TRUE(matchText(GetParam().Text) == GetParam().Matched); +} + +INSTANTIATE_TEST_SUITE_P(TokenExpressionParserTests, MatcherTest, + ::testing::ValuesIn(Params)); + +} // namespace test +} // namespace tidy +} // namespace clang + +std::ostream &operator<<(std::ostream &str, + const clang::tidy::test::Param &value) { + return str << "Matched: " << std::boolalpha << value.Matched << ", Text: '" + << value.Text << "'"; +}