Index: clang-tidy/modernize/CMakeLists.txt =================================================================== --- clang-tidy/modernize/CMakeLists.txt +++ clang-tidy/modernize/CMakeLists.txt @@ -7,6 +7,7 @@ MakeUniqueCheck.cpp ModernizeTidyModule.cpp PassByValueCheck.cpp + RawStringLiteralCheck.cpp RedundantVoidArgCheck.cpp ReplaceAutoPtrCheck.cpp ShrinkToFitCheck.cpp Index: clang-tidy/modernize/ModernizeTidyModule.cpp =================================================================== --- clang-tidy/modernize/ModernizeTidyModule.cpp +++ clang-tidy/modernize/ModernizeTidyModule.cpp @@ -14,6 +14,7 @@ #include "LoopConvertCheck.h" #include "MakeUniqueCheck.h" #include "PassByValueCheck.h" +#include "RawStringLiteralCheck.h" #include "RedundantVoidArgCheck.h" #include "ReplaceAutoPtrCheck.h" #include "ShrinkToFitCheck.h" @@ -36,6 +37,8 @@ CheckFactories.registerCheck("modernize-loop-convert"); CheckFactories.registerCheck("modernize-make-unique"); CheckFactories.registerCheck("modernize-pass-by-value"); + CheckFactories.registerCheck( + "modernize-raw-string-literal"); CheckFactories.registerCheck( "modernize-redundant-void-arg"); CheckFactories.registerCheck( Index: clang-tidy/modernize/RawStringLiteralCheck.h =================================================================== --- /dev/null +++ clang-tidy/modernize/RawStringLiteralCheck.h @@ -0,0 +1,45 @@ +//===--- RawStringLiteralCheck.h - clang-tidy--------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_RAW_STRING_LITERAL_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_RAW_STRING_LITERAL_H + +#include "../ClangTidy.h" +//#include + +namespace clang { +namespace tidy { +namespace modernize { + +/// This check replaces string literals with escaped characters to +/// raw string literals. +/// +/// For the user-facing documentation see: +/// http://clang.llvm.org/extra/clang-tidy/checks/modernize-raw-string-literal.html +class RawStringLiteralCheck : public ClangTidyCheck { +public: + RawStringLiteralCheck(StringRef Name, ClangTidyContext *Context); + + void storeOptions(ClangTidyOptions::OptionMap &Options) override; + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; + +private: + void replaceWithRawStringLiteral( + const ast_matchers::MatchFinder::MatchResult &Result, + const StringLiteral *Literal); + + std::string DelimiterStem; +}; + +} // namespace modernize +} // namespace tidy +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_RAW_STRING_LITERAL_H Index: clang-tidy/modernize/RawStringLiteralCheck.cpp =================================================================== --- /dev/null +++ clang-tidy/modernize/RawStringLiteralCheck.cpp @@ -0,0 +1,140 @@ +//===--- RawStringLiteralCheck.cpp - clang-tidy----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "RawStringLiteralCheck.h" +#include "clang/AST/ASTContext.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/Lex/Lexer.h" + +using namespace clang::ast_matchers; + +namespace clang { +namespace tidy { +namespace modernize { + +namespace { + +bool containsEscapes(StringRef HayStack, StringRef Escapes) { + size_t BackSlash = HayStack.find('\\'); + if (BackSlash == StringRef::npos) + return false; + + while (BackSlash != StringRef::npos) { + if (Escapes.find(HayStack[BackSlash + 1]) == StringRef::npos) + return false; + BackSlash = HayStack.find('\\', BackSlash + 2); + } + + return true; +} + +bool isRawStringLiteral(StringRef Text) { + // Already a raw string literal if R comes before ". + const size_t QuotePos = Text.find('"'); + assert(QuotePos != StringRef::npos); + return (QuotePos > 0) && (Text[QuotePos - 1] == 'R'); +} + +bool containsEscapedCharacters(const MatchFinder::MatchResult &Result, + const StringLiteral *Literal) { + // FIXME: Handle L"", u8"", u"" and U"" literals. + if (!Literal->isAscii()) + return false; + + StringRef Bytes = Literal->getBytes(); + // Non-printing characters disqualify this literal: + // \007 = \a bell + // \010 = \b backspace + // \011 = \t horizontal tab + // \012 = \n new line + // \013 = \v vertical tab + // \014 = \f form feed + // \015 = \r carriage return + // \177 = delete + if (Bytes.find_first_of(StringRef("\000\001\002\003\004\005\006\a" + "\b\t\n\v\f\r\016\017" + "\020\021\022\023\024\025\026\027" + "\030\031\032\033\034\035\036\037" + "\177", + 33)) != StringRef::npos) + return false; + + CharSourceRange CharRange = Lexer::makeFileCharRange( + CharSourceRange::getTokenRange(Literal->getSourceRange()), + *Result.SourceManager, Result.Context->getLangOpts()); + StringRef Text = Lexer::getSourceText(CharRange, *Result.SourceManager, + Result.Context->getLangOpts()); + if (isRawStringLiteral(Text)) + return false; + + return containsEscapes(Text, R"('\"?x01)"); +} + +bool containsDelimiter(StringRef Bytes, const std::string &Delimiter) { + return Bytes.find(Delimiter.empty() + ? std::string(R"lit()")lit") + : (")" + Delimiter + R"(")")) != StringRef::npos; +} + +std::string asRawStringLiteral(const StringLiteral *Literal, + const std::string &DelimiterStem) { + const StringRef Bytes = Literal->getBytes(); + std::string Delimiter; + for (int I = 0; containsDelimiter(Bytes, Delimiter); ++I) { + Delimiter = (I == 0) ? DelimiterStem : DelimiterStem + std::to_string(I); + } + + if (Delimiter.empty()) + return (R"(R"()" + Bytes + R"lit()")lit").str(); + + return (R"(R")" + Delimiter + "(" + Bytes + ")" + Delimiter + R"(")").str(); +} + +} // namespace + +RawStringLiteralCheck::RawStringLiteralCheck(StringRef Name, + ClangTidyContext *Context) + : ClangTidyCheck(Name, Context), + DelimiterStem(Options.get("DelimiterStem", "lit")) {} + +void RawStringLiteralCheck::storeOptions(ClangTidyOptions::OptionMap &Options) { + ClangTidyCheck::storeOptions(Options); +} + +void RawStringLiteralCheck::registerMatchers(MatchFinder *Finder) { + Finder->addMatcher(stringLiteral().bind("lit"), this); +} + +void RawStringLiteralCheck::check(const MatchFinder::MatchResult &Result) { + // Raw string literals require C++11 or later. + if (!Result.Context->getLangOpts().CPlusPlus11) + return; + + const auto *Literal = Result.Nodes.getNodeAs("lit"); + if (Literal->getLocStart().isMacroID()) + return; + + if (containsEscapedCharacters(Result, Literal)) + replaceWithRawStringLiteral(Result, Literal); +} + +void RawStringLiteralCheck::replaceWithRawStringLiteral( + const MatchFinder::MatchResult &Result, const StringLiteral *Literal) { + CharSourceRange CharRange = Lexer::makeFileCharRange( + CharSourceRange::getTokenRange(Literal->getSourceRange()), + *Result.SourceManager, Result.Context->getLangOpts()); + diag(Literal->getLocStart(), + "escaped string literal can be written as a raw string literal") + << FixItHint::CreateReplacement( + CharRange, asRawStringLiteral(Literal, DelimiterStem)); +} + +} // namespace modernize +} // namespace tidy +} // namespace clang Index: docs/ReleaseNotes.rst =================================================================== --- docs/ReleaseNotes.rst +++ docs/ReleaseNotes.rst @@ -63,7 +63,10 @@ explain them more clearly, and provide more accurate fix-its for the issues identified. The improvements since the 3.8 release include: -- ... +- New modernize-raw-string-literal check + + This check selectively replaces string literals containing escaped + characters with raw string literals. Improvements to ``modularize`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Index: docs/clang-tidy/checks/list.rst =================================================================== --- docs/clang-tidy/checks/list.rst +++ docs/clang-tidy/checks/list.rst @@ -78,6 +78,7 @@ modernize-loop-convert modernize-make-unique modernize-pass-by-value + modernize-raw-string-literal modernize-redundant-void-arg modernize-replace-auto-ptr modernize-shrink-to-fit Index: docs/clang-tidy/checks/modernize-raw-string-literal.rst =================================================================== --- /dev/null +++ docs/clang-tidy/checks/modernize-raw-string-literal.rst @@ -0,0 +1,46 @@ +.. title:: clang-tidy - modernize-raw-string-literal + +modernize-raw-string-literal +============================ + +This check selectively replaces string literals containing escaped characters +with raw string literals. + +Example: + +.. code-blocK:: c++ + + const char *const Quotes{"embedded \"quotes\""}; + const char *const Paragraph{"Line one.\nLine two.\nLine three.\n"}; + const char *const SingleLine{"Single line.\n"}; + const char *const TrailingSpace{"Look here -> \n"}; + const char *const Tab{"One\tTwo\n"}; + const char *const Bell{"Hello!\a And welcome!"}; + const char *const Path{"C:\\Program Files\\Vendor\\Application.exe"}; + const char *const RegEx{"\\w\\([a-z]\\)"}; + +becomes + +.. code-block:: c++ + + const char *const Quotes{R"(embedded "quotes")"}; + const char *const Paragraph{"Line one.\nLine two.\nLine three.\n"}; + const char *const SingleLine{"Single line.\n"}; + const char *const TrailingSpace{"Look here -> \n"}; + const char *const Tab{"One\tTwo\n"}; + const char *const Bell{"Hello!\a And welcome!"}; + const char *const Path{R"(C:\Program Files\Vendor\Application.exe)"}; + const char *const RegEx{R"(\w\([a-z]\))"}; + +The presence of any of the following escapes can cause the string to be +converted to a raw string literal: ``\\``, ``\'``, ``\"``, ``\?``, +and octal or hexadecimal escapes for printable ASCII characters. + +A string literal containing only escaped newlines is a common way of +writing lines of text output. Introducing physical newlines with raw +string literals in this case is likely to impede readability. These +string literals are left unchanged. + +An escaped horizontal tab, form feed, or vertical tab prevents the string +literal from being converted. The presence of a horizontal tab, form feed or +vertical tab in source code is not visually obvious. Index: test/clang-tidy/modernize-raw-string-literal-delimiter.cpp =================================================================== --- /dev/null +++ test/clang-tidy/modernize-raw-string-literal-delimiter.cpp @@ -0,0 +1,9 @@ +// RUN: %check_clang_tidy %s modernize-raw-string-literal %t -- -config='{CheckOptions: [{key: "modernize-raw-string-literal.DelimiterStem", value: "str"}]}' -- -std=c++11 + +char const *const ContainsSentinel{"who\\ops)\""}; +// CHECK-MESSAGES: :[[@LINE-1]]:36: warning: {{.*}} can be written as a raw string literal +// CHECK-FIXES: {{^}}char const *const ContainsSentinel{R"str(who\ops)")str"};{{$}} + +//char const *const ContainsDelim{"whoops)\")lit\""}; +// CHECK-XMESSAGES: :[[@LINE-1]]:33: warning: {{.*}} can be written as a raw string literal +// CHECK-XFIXES: {{^}}char const *const ContainsDelim{R"lit1(whoops)")lit")lit1"};{{$}} Index: test/clang-tidy/modernize-raw-string-literal.cpp =================================================================== --- /dev/null +++ test/clang-tidy/modernize-raw-string-literal.cpp @@ -0,0 +1,123 @@ +// RUN: %check_clang_tidy %s modernize-raw-string-literal %t + +char const *const BackSlash("goink\\frob"); +// CHECK-MESSAGES: :[[@LINE-1]]:29: warning: escaped string literal can be written as a raw string literal [modernize-raw-string-literal] +// CHECK-FIXES: {{^}}char const *const BackSlash(R"(goink\frob)");{{$}} + +char const *const PlainLiteral("plain literal"); + +// Non-printable ASCII characters. +char const *const Nul("goink\\\000"); +char const *const Soh("goink\\\001"); +char const *const Stx("goink\\\002"); +char const *const Etx("goink\\\003"); +char const *const Enq("goink\\\004"); +char const *const Ack("goink\\\005"); +char const *const Bell("goink\\\afrob"); +char const *const BackSpace("goink\\\bfrob"); +char const *const HorizontalTab("goink\\\tfrob"); +char const *const NewLine("goink\nfrob"); +char const *const VerticalTab("goink\\\vfrob"); +char const *const FormFeed("goink\\\ffrob"); +char const *const CarraigeReturn("goink\\\rfrob"); +char const *const So("goink\\\016"); +char const *const Si("goink\\\017"); +char const *const Dle("goink\\\020"); +char const *const Dc1("goink\\\021"); +char const *const Dc2("goink\\\022"); +char const *const Dc3("goink\\\023"); +char const *const Dc4("goink\\\024"); +char const *const Nak("goink\\\025"); +char const *const Syn("goink\\\026"); +char const *const Etb("goink\\\027"); +char const *const Can("goink\\\030"); +char const *const Em("goink\\\031"); +char const *const Sub("goink\\\032"); +char const *const Esc("goink\\\033"); +char const *const Fs("goink\\\034"); +char const *const Gs("goink\\\035"); +char const *const Rs("goink\\\036"); +char const *const Us("goink\\\037"); +char const *const HexNonPrintable("\\\x03"); +char const *const Delete("\\\177"); + +char const *const TrailingSpace("A line \\with space. \n"); +char const *const TrailingNewLine("A single \\line.\n"); +char const *const AlreadyRaw(R"(foobie\\bletch)"); +char const *const UTF8Literal(u8"foobie\\bletch"); +char const *const UTF8RawLiteral(u8R"(foobie\\bletch)"); +char16_t const *const UTF16Literal(u"foobie\\bletch"); +char16_t const *const UTF16RawLiteral(uR"(foobie\\bletch)"); +char32_t const *const UTF32Literal(U"foobie\\bletch"); +char32_t const *const UTF32RawLiteral(UR"(foobie\\bletch)"); +wchar_t const *const WideLiteral(L"foobie\\bletch"); +wchar_t const *const WideRawLiteral(LR"(foobie\\bletch)"); + +char const *const SingleQuote("goink\'frob"); +// CHECK-MESSAGES: :[[@LINE-1]]:31: warning: {{.*}} can be written as a raw string literal +// CHECK-XFIXES: {{^}}char const *const SingleQuote(R"(goink'frob)");{{$}} + +char const *const DoubleQuote("goink\"frob"); +// CHECK-MESSAGES: :[[@LINE-1]]:31: warning: {{.*}} can be written as a raw string literal +// CHECK-FIXES: {{^}}char const *const DoubleQuote(R"(goink"frob)");{{$}} + +char const *const QuestionMark("goink\?frob"); +// CHECK-MESSAGES: :[[@LINE-1]]:32: warning: {{.*}} can be written as a raw string literal +// CHECK-FIXES: {{^}}char const *const QuestionMark(R"(goink?frob)");{{$}} + +char const *const RegEx("goink\\(one|two\\)\\\\\\?.*\\nfrob"); +// CHECK-MESSAGES: :[[@LINE-1]]:25: warning: {{.*}} can be written as a raw string literal +// CHECK-FIXES: {{^}}char const *const RegEx(R"(goink\(one|two\)\\\?.*\nfrob)");{{$}} + +char const *const Path("C:\\Program Files\\Vendor\\Application\\Application.exe"); +// CHECK-MESSAGES: :[[@LINE-1]]:24: warning: {{.*}} can be written as a raw string literal +// CHECK-FIXES: {{^}}char const *const Path(R"(C:\Program Files\Vendor\Application\Application.exe)");{{$}} + +char const *const ContainsSentinel("who\\ops)\""); +// CHECK-MESSAGES: :[[@LINE-1]]:36: warning: {{.*}} can be written as a raw string literal +// CHECK-FIXES: {{^}}char const *const ContainsSentinel(R"lit(who\ops)")lit");{{$}} + +char const *const ContainsDelim("whoops)\")lit\""); +// CHECK-MESSAGES: :[[@LINE-1]]:33: warning: {{.*}} can be written as a raw string literal +// CHECK-FIXES: {{^}}char const *const ContainsDelim(R"lit1(whoops)")lit")lit1");{{$}} + +char const *const OctalPrintable("\100\\"); +// CHECK-MESSAGES: :[[@LINE-1]]:34: warning: {{.*}} can be written as a raw string literal +// CHECK-FIXES: {{^}}char const *const OctalPrintable(R"(@\)");{{$}} + +char const *const HexPrintable("\x40\\"); +// CHECK-MESSAGES: :[[@LINE-1]]:32: warning: {{.*}} can be written as a raw string literal +// CHECK-FIXES: {{^}}char const *const HexPrintable(R"(@\)");{{$}} + +#define TRICK(arg_) #arg_ +char const *const MacroBody = TRICK(foo\\bar); + +#define HAT(rabbit_) #rabbit_ "foo\\bar" +char const *const StringizedMacroArgument = HAT(foo\\bar); + +#define SUBST(lit_) lit_ +char const *const MacroArgument = SUBST("foo\\bar"); +// FIXME: We should be able to replace this string literal macro argument + +template +void fn(char const *const Arg) { + char const *const Str("foo\\bar"); + // CHECK-MESSAGES: :[[@LINE-1]]:25: warning: {{.*}} can be written as a raw string literal + // CHECK-FIXES: {{^}} char const *const Str(R"(foo\bar)");{{$}} +} + +template <> +void fn(char const *const Arg) { + char const *const Str("foo\\bar"); + // CHECK-MESSAGES: :[[@LINE-1]]:25: warning: {{.*}} can be written as a raw string literal + // CHECK-FIXES: {{^}} char const *const Str(R"(foo\bar)");{{$}} +} + +void callFn() { + fn("foo\\bar"); + // CHECK-MESSAGES: :[[@LINE-1]]:11: warning: {{.*}} can be written as a raw string literal + // CHECK-FIXES: {{^}} fn(R"(foo\bar)");{{$}} + fn("foo\\bar"); + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: {{.*}} can be written as a raw string literal + // CHECK-FIXES: {{^}} fn(R"(foo\bar)");{{$}} +}