Index: clang-tidy/misc/SuspiciousMissingCommaCheck.h =================================================================== --- clang-tidy/misc/SuspiciousMissingCommaCheck.h +++ clang-tidy/misc/SuspiciousMissingCommaCheck.h @@ -32,6 +32,8 @@ const unsigned SizeThreshold; // Maximal threshold ratio of suspicious string literals to be considered. const double RatioThreshold; + // Maximal number of concatenated tokens. + const unsigned MaxConcatenatedTokens; }; } // namespace misc Index: clang-tidy/misc/SuspiciousMissingCommaCheck.cpp =================================================================== --- clang-tidy/misc/SuspiciousMissingCommaCheck.cpp +++ clang-tidy/misc/SuspiciousMissingCommaCheck.cpp @@ -19,8 +19,51 @@ namespace { -AST_MATCHER(StringLiteral, isConcatenatedLiteral) { - return Node.getNumConcatenated() > 1; +bool isConcatenatedLiteralsOnPurpose(ASTContext* Ctx, + const StringLiteral* Lit) { + // String literals surrounded by parentheses are assumed to be on purpose. + // i.e.: const char* Array[] = { ("a" "b" "c"), "d", [...] }; + auto Parents = Ctx->getParents(*Lit); + if (Parents.size() == 1 && Parents[0].get() != nullptr) + return true; + + // Appropriately indented string literals are assumed to be on purpose. + // The following frequent indentation is accepted: + // const char* Array[] = { + // "first literal" + // "indented literal" + // "indented literal", + // "second literal", + // [...] + // }; + const SourceManager& SM = Ctx->getSourceManager(); + bool IndentedCorrectly = true; + SourceLocation FirstToken = Lit->getStrTokenLoc(0); + FileID BaseFID = SM.getFileID(FirstToken); + unsigned int BaseIndent = SM.getSpellingColumnNumber(FirstToken); + unsigned int BaseLine = SM.getSpellingLineNumber(FirstToken); + for (unsigned int TokNum = 1; TokNum < Lit->getNumConcatenated(); ++ TokNum) { + SourceLocation Token = Lit->getStrTokenLoc(TokNum); + FileID FID = SM.getFileID(Token); + unsigned int Indent = SM.getSpellingColumnNumber(Token); + unsigned int Line = SM.getSpellingLineNumber(Token); + if (FID != BaseFID || Line != BaseLine + TokNum || Indent <= BaseIndent) { + IndentedCorrectly = false; + break; + } + } + if (IndentedCorrectly) + return true; + + // There is no pattern recognized by the checker, assume it's not on purpose. + return false; +} + +AST_MATCHER_P(StringLiteral, isConcatenatedLiteral, + unsigned, MaxConcatenatedTokens) { + return Node.getNumConcatenated() > 1 && + Node.getNumConcatenated() < MaxConcatenatedTokens && + !isConcatenatedLiteralsOnPurpose(&Finder->getASTContext(), &Node); } } // namespace @@ -29,17 +72,19 @@ StringRef Name, ClangTidyContext *Context) : ClangTidyCheck(Name, Context), SizeThreshold(Options.get("SizeThreshold", 5U)), - RatioThreshold(std::stod(Options.get("RatioThreshold", ".2"))) {} + RatioThreshold(std::stod(Options.get("RatioThreshold", ".2"))), + MaxConcatenatedTokens(Options.get("MaxConcatenatedTokens", 5U)) {} void SuspiciousMissingCommaCheck::storeOptions( ClangTidyOptions::OptionMap &Opts) { Options.store(Opts, "SizeThreshold", SizeThreshold); Options.store(Opts, "RatioThreshold", std::to_string(RatioThreshold)); + Options.store(Opts, "MaxConcatenatedTokens", MaxConcatenatedTokens); } void SuspiciousMissingCommaCheck::registerMatchers(MatchFinder *Finder) { const auto ConcatenatedStringLiteral = - stringLiteral(isConcatenatedLiteral()).bind("str"); + stringLiteral(isConcatenatedLiteral(MaxConcatenatedTokens)).bind("str"); const auto StringsInitializerList = initListExpr(hasType(constantArrayType()), @@ -51,9 +96,10 @@ void SuspiciousMissingCommaCheck::check( const MatchFinder::MatchResult &Result) { const auto *InitializerList = Result.Nodes.getNodeAs("list"); - const auto *ConcatenatedLiteral = Result.Nodes.getNodeAs("str"); + const auto *ConcatenatedLiteral = + Result.Nodes.getNodeAs("str"); assert(InitializerList && ConcatenatedLiteral); - + // Skip small arrays as they often generate false-positive. unsigned int Size = InitializerList->getNumInits(); if (Size < SizeThreshold) return; Index: test/clang-tidy/misc-suspicious-missing-comma.cpp =================================================================== --- test/clang-tidy/misc-suspicious-missing-comma.cpp +++ test/clang-tidy/misc-suspicious-missing-comma.cpp @@ -15,7 +15,8 @@ L"Red", L"Yellow", L"Blue", L"Green", L"Purple", L"Rose", L"White", L"Black" }; -// The following array should not trigger any warnings. +// The following array should not trigger any warnings. There is more than 5 +// elements, but they are all concatenated string literals. const char* HttpCommands[] = { "GET / HTTP/1.0\r\n" "\r\n", @@ -26,9 +27,56 @@ "GET /favicon.ico HTTP/1.0\r\n" "header: dummy" "\r\n", + + "GET /index.html-en HTTP/1.0\r\n" + "\r\n", + + "GET /index.html-fr HTTP/1.0\r\n" + "\r\n", + + "GET /index.html-es HTTP/1.0\r\n" + "\r\n", }; // This array is too small to trigger a warning. const char* SmallArray[] = { "a" "b", "c" }; + +// Parentheses should be enough to avoid warnings. +const char* ParentheseArray[] = { + ("a" "b"), "c", + ("d" + "e" + "f"), + "g", "h", "i", "j", "k", "l" +}; + +// Indentation should be enough to avoid warnings. +const char* CorrectlyIndentedArray[] = { + "This is a long message " + "which is spanning over multiple lines." + "And this should be fine.", + "a", "b", "c", "d", "e", "f", + "g", "h", "i", "j", "k", "l" +}; + +const char* IncorrectlyIndentedArray[] = { + "This is a long message " + "which is spanning over multiple lines." + "And this should be fine.", + "a", "b", "c", "d", "e", "f", + "g", "h", "i", "j", "k", "l" +}; +// CHECK-MESSAGES: :[[@LINE-6]]:3: warning: suspicious string literal, probably missing a comma [misc-suspicious-missing-comma] + +const char* TooManyConcatenatedTokensArray[] = { + "Dummy line", + "Dummy line", + "a" "b" "c" "d" "e" "f", + "g" "h" "i" "j" "k" "l", + "Dummy line", + "Dummy line", + "Dummy line", + "Dummy line", +};