Index: clang-tidy/modernize/CMakeLists.txt =================================================================== --- clang-tidy/modernize/CMakeLists.txt +++ clang-tidy/modernize/CMakeLists.txt @@ -6,6 +6,7 @@ MakeUniqueCheck.cpp ModernizeTidyModule.cpp PassByValueCheck.cpp + RawStringLiteralCheck.cpp RedundantVoidArgCheck.cpp ReplaceAutoPtrCheck.cpp ShrinkToFitCheck.cpp Index: clang-tidy/modernize/ModernizeTidyModule.cpp =================================================================== --- clang-tidy/modernize/ModernizeTidyModule.cpp +++ clang-tidy/modernize/ModernizeTidyModule.cpp @@ -13,6 +13,7 @@ #include "LoopConvertCheck.h" #include "MakeUniqueCheck.h" #include "PassByValueCheck.h" +#include "RawStringLiteralCheck.h" #include "RedundantVoidArgCheck.h" #include "ReplaceAutoPtrCheck.h" #include "ShrinkToFitCheck.h" @@ -33,6 +34,8 @@ CheckFactories.registerCheck("modernize-loop-convert"); CheckFactories.registerCheck("modernize-make-unique"); CheckFactories.registerCheck("modernize-pass-by-value"); + CheckFactories.registerCheck( + "modernize-raw-string-literal"); CheckFactories.registerCheck( "modernize-redundant-void-arg"); CheckFactories.registerCheck( Index: clang-tidy/modernize/RawStringLiteralCheck.h =================================================================== --- /dev/null +++ clang-tidy/modernize/RawStringLiteralCheck.h @@ -0,0 +1,45 @@ +//===--- RawStringLiteralCheck.h - clang-tidy--------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_RAW_STRING_LITERAL_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_RAW_STRING_LITERAL_H + +#include "../ClangTidy.h" +//#include + +namespace clang { +namespace tidy { +namespace modernize { + +/// This check replaces string literals with escaped characters to +/// raw string literals. +/// +/// For the user-facing documentation see: +/// http://clang.llvm.org/extra/clang-tidy/checks/modernize-raw-string-literal.html +class RawStringLiteralCheck : public ClangTidyCheck { +public: + RawStringLiteralCheck(StringRef Name, ClangTidyContext *Context); + + void storeOptions(ClangTidyOptions::OptionMap &Options) override; + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; + +private: + void replaceWithRawStringLiteral( + const ast_matchers::MatchFinder::MatchResult &Result, + const StringLiteral *Literal); + + std::string DelimiterStem; +}; + +} // namespace modernize +} // namespace tidy +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MODERNIZE_RAW_STRING_LITERAL_H Index: clang-tidy/modernize/RawStringLiteralCheck.cpp =================================================================== --- /dev/null +++ clang-tidy/modernize/RawStringLiteralCheck.cpp @@ -0,0 +1,140 @@ +//===--- RawStringLiteralCheck.cpp - clang-tidy----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "RawStringLiteralCheck.h" +#include "clang/AST/ASTContext.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/Lex/Lexer.h" + +using namespace clang::ast_matchers; + +namespace clang { +namespace tidy { +namespace modernize { + +namespace { + +bool containsEscapes(StringRef HayStack, StringRef Escapes) { + size_t BackSlash = HayStack.find('\\'); + if (BackSlash == StringRef::npos) + return false; + + while (BackSlash != StringRef::npos) { + if (Escapes.find(HayStack[BackSlash + 1]) == StringRef::npos) + return false; + BackSlash = HayStack.find('\\', BackSlash + 2); + } + + return true; +} + +bool isRawStringLiteral(StringRef Text) { + // Already a raw string literal if R comes before ". + const size_t QuotePos = Text.find('"'); + assert(QuotePos != StringRef::npos); + return (QuotePos > 0) && (Text[QuotePos - 1] == 'R'); +} + +bool containsEscapedCharacters(const MatchFinder::MatchResult &Result, + const StringLiteral *Literal) { + // FIXME: Handle L"", u8"", u"" and U"" literals. + if (!Literal->isAscii()) + return false; + + StringRef Bytes = Literal->getBytes(); + // Non-printing characters disqualify this literal: + // \007 = \a bell + // \010 = \b backspace + // \011 = \t horizontal tab + // \012 = \n new line + // \013 = \v vertical tab + // \014 = \f form feed + // \015 = \r carriage return + // \177 = delete + if (Bytes.find_first_of(StringRef("\000\001\002\003\004\005\006\a" + "\b\t\n\v\f\r\016\017" + "\020\021\022\023\024\025\026\027" + "\030\031\032\033\034\035\036\037" + "\177", + 33)) != StringRef::npos) + return false; + + CharSourceRange CharRange = Lexer::makeFileCharRange( + CharSourceRange::getTokenRange(Literal->getSourceRange()), + *Result.SourceManager, Result.Context->getLangOpts()); + StringRef Text = Lexer::getSourceText(CharRange, *Result.SourceManager, + Result.Context->getLangOpts()); + if (isRawStringLiteral(Text)) + return false; + + return containsEscapes(Text, R"('\"?x01)"); +} + +bool containsDelimiter(StringRef Bytes, const std::string &Delimiter) { + return Bytes.find(Delimiter.empty() + ? std::string{R"lit()")lit"} + : (")" + Delimiter + R"(")")) != StringRef::npos; +} + +std::string asRawStringLiteral(const StringLiteral *Literal, + const std::string &DelimiterStem) { + const StringRef Bytes = Literal->getBytes(); + std::string Delimiter; + for (int I = 0; containsDelimiter(Bytes, Delimiter); ++I) { + Delimiter = (I == 0) ? DelimiterStem : DelimiterStem + std::to_string(I); + } + + if (Delimiter.empty()) + return (R"(R"()" + Bytes + R"lit()")lit").str(); + + return (R"(R")" + Delimiter + "(" + Bytes + ")" + Delimiter + R"(")").str(); +} + +} // namespace + +RawStringLiteralCheck::RawStringLiteralCheck(StringRef Name, + ClangTidyContext *Context) + : ClangTidyCheck(Name, Context), + DelimiterStem{Options.get("DelimiterStem", "lit")} {} + +void RawStringLiteralCheck::storeOptions(ClangTidyOptions::OptionMap &Options) { + ClangTidyCheck::storeOptions(Options); +} + +void RawStringLiteralCheck::registerMatchers(MatchFinder *Finder) { + Finder->addMatcher(stringLiteral().bind("lit"), this); +} + +void RawStringLiteralCheck::check(const MatchFinder::MatchResult &Result) { + // Raw string literals require C++11 or later. + if (!Result.Context->getLangOpts().CPlusPlus11) + return; + + const auto *Literal = Result.Nodes.getNodeAs("lit"); + if (Literal->getLocStart().isMacroID()) + return; + + if (containsEscapedCharacters(Result, Literal)) + replaceWithRawStringLiteral(Result, Literal); +} + +void RawStringLiteralCheck::replaceWithRawStringLiteral( + const MatchFinder::MatchResult &Result, const StringLiteral *Literal) { + CharSourceRange CharRange = Lexer::makeFileCharRange( + CharSourceRange::getTokenRange(Literal->getSourceRange()), + *Result.SourceManager, Result.Context->getLangOpts()); + diag(Literal->getLocStart(), + "escaped string literal can be written as a raw string literal") + << FixItHint::CreateReplacement( + CharRange, asRawStringLiteral(Literal, DelimiterStem)); +} + +} // namespace modernize +} // namespace tidy +} // namespace clang Index: docs/clang-tidy/checks/list.rst =================================================================== --- docs/clang-tidy/checks/list.rst +++ docs/clang-tidy/checks/list.rst @@ -73,6 +73,7 @@ modernize-loop-convert modernize-make-unique modernize-pass-by-value + modernize-raw-string-literal modernize-redundant-void-arg modernize-replace-auto-ptr modernize-shrink-to-fit Index: docs/clang-tidy/checks/modernize-raw-string-literal.rst =================================================================== --- /dev/null +++ docs/clang-tidy/checks/modernize-raw-string-literal.rst @@ -0,0 +1,47 @@ +.. title:: clang-tidy - modernize-raw-string-literal + +modernize-raw-string-literal +============================ + +This check selectively replaces string literals containing escaped characters +with raw string literals. + +Example: + +.. code-blocK:: c++ + + const char *const Quotes{"embedded \"quotes\""}; + const char *const Paragraph{"Line one.\nLine two.\nLine three.\n"}; + const char *const SingleLine{"Single line.\n"}; + const char *const TrailingSpace{"Look here -> \n"}; + const char *const Tab{"One\tTwo\n"}; + const char *const Bell{"Hello!\a And welcome!"}; + const char *const Path{"C:\\Program Files\\Vendor\\Application.exe"}; + const char *const RegEx{"\\w\\([a-z]\\)"}; + +becomes + +.. code-block:: c++ + + const char *const Quotes{R"(embedded "quotes")"}; + const char *const Paragraph{R"(Line one.\nLine two.\nLine three.\n)"}; + const char *const SingleLine{"Single line.\n"}; + const char *const TrailingSpace{"Look here -> \n"}; + const char *const Tab{"One\tTwo\n"}; + const char *const Bell{"Hello!\a And welcome!"}; + const char *const Path{R"(C:\Program Files\Vendor\Application.exe)"}; + const char *const RegEx{R"(\w\([a-z]\))"}; + +The presence of any of the following escapes can cause the string to be +converted to a raw string literal: ``\\``, ``\'``, ``\"``, ``\?``, +and octal or hexadecimal escapes for printable ASCII characters. + +A string literal containing only escaped newlines is a common way of +writing lines of text output. Introducing physical newlines with raw +string literals in this case is likely to impede readability. These +string literals are left unchanged. + +An escaped horizontal tab, form feed, or vertical tab prevents the string +literal from being converted. Unlike a physical newline, the presence of a +horizontal tab, form feed or vertical tab in source code is not visually +obvious. Index: test/clang-tidy/modernize-raw-string-literal-delimiter.cpp =================================================================== --- /dev/null +++ test/clang-tidy/modernize-raw-string-literal-delimiter.cpp @@ -0,0 +1,9 @@ +// RUN: %check_clang_tidy %s modernize-raw-string-literal %t -- -config='{CheckOptions: [{key: "modernize-raw-string-literal.DelimiterStem", value: "str"}]}' -- -std=c++11 + +char const *const ContainsSentinel{"who\\ops)\""}; +// CHECK-MESSAGES: :[[@LINE-1]]:36: warning: {{.*}} can be written as a raw string literal +// CHECK-FIXES: {{^}}char const *const ContainsSentinel{R"str(who\ops)")str"};{{$}} + +//char const *const ContainsDelim{"whoops)\")lit\""}; +// CHECK-XMESSAGES: :[[@LINE-1]]:33: warning: {{.*}} can be written as a raw string literal +// CHECK-XFIXES: {{^}}char const *const ContainsDelim{R"lit1(whoops)")lit")lit1"};{{$}} Index: test/clang-tidy/modernize-raw-string-literal.cpp =================================================================== --- /dev/null +++ test/clang-tidy/modernize-raw-string-literal.cpp @@ -0,0 +1,122 @@ +// RUN: %check_clang_tidy %s modernize-raw-string-literal %t + +char const *const BackSlash{"goink\\frob"}; +// CHECK-MESSAGES: :[[@LINE-1]]:29: warning: escaped string literal can be written as a raw string literal [modernize-raw-string-literal] +// CHECK-FIXES: {{^}}char const *const BackSlash{R"(goink\frob)"};{{$}} + +char const *const PlainLiteral("plain literal"); + +// Non-printable ASCII characters. +char const *const Nul{"goink\\\000"}; +char const *const Soh{"goink\\\001"}; +char const *const Stx{"goink\\\002"}; +char const *const Etx{"goink\\\003"}; +char const *const Enq{"goink\\\004"}; +char const *const Ack{"goink\\\005"}; +char const *const Bell{"goink\\\afrob"}; +char const *const BackSpace{"goink\\\bfrob"}; +char const *const HorizontalTab{"goink\\\tfrob"}; +char const *const NewLine{"goink\nfrob"}; +char const *const VerticalTab{"goink\\\vfrob"}; +char const *const FormFeed{"goink\\\ffrob"}; +char const *const CarraigeReturn{"goink\\\rfrob"}; +char const *const So{"goink\\\016"}; +char const *const Si{"goink\\\017"}; +char const *const Dle{"goink\\\020"}; +char const *const Dc1{"goink\\\021"}; +char const *const Dc2{"goink\\\022"}; +char const *const Dc3{"goink\\\023"}; +char const *const Dc4{"goink\\\024"}; +char const *const Nak{"goink\\\025"}; +char const *const Syn{"goink\\\026"}; +char const *const Etb{"goink\\\027"}; +char const *const Can{"goink\\\030"}; +char const *const Em{"goink\\\031"}; +char const *const Sub{"goink\\\032"}; +char const *const Esc{"goink\\\033"}; +char const *const Fs{"goink\\\034"}; +char const *const Gs{"goink\\\035"}; +char const *const Rs{"goink\\\036"}; +char const *const Us{"goink\\\037"}; +char const *const HexNonPrintable{"\\\x03"}; +char const *const Delete{"\\\177"}; + +char const *const TrailingSpace{"A line \\with space. \n"}; +char const *const TrailingNewLine{"A single \\line.\n"}; +char const *const AlreadyRaw{R"(foobie\\bletch)"}; +char const *const UTF8Literal{u8"foobie\\bletch"}; +char const *const UTF8RawLiteral{u8R"(foobie\\bletch)"}; +char16_t const *const UTF16Literal{u"foobie\\bletch"}; +char16_t const *const UTF16RawLiteral{uR"(foobie\\bletch)"}; +char32_t const *const UTF32Literal{U"foobie\\bletch"}; +char32_t const *const UTF32RawLiteral{UR"(foobie\\bletch)"}; +wchar_t const *const WideLiteral{L"foobie\\bletch"}; +wchar_t const *const WideRawLiteral{LR"(foobie\\bletch)"}; + +char const *const SingleQuote{"goink\'frob"}; +// CHECK-MESSAGES: :[[@LINE-1]]:31: warning: {{.*}} can be written as a raw string literal +// CHECK-XFIXES: {{^}}char const *const SingleQuote{R"(goink'frob)"};{{$}} + +char const *const DoubleQuote{"goink\"frob"}; +// CHECK-MESSAGES: :[[@LINE-1]]:31: warning: {{.*}} can be written as a raw string literal +// CHECK-FIXES: {{^}}char const *const DoubleQuote{R"(goink"frob)"};{{$}} + +char const *const QuestionMark{"goink\?frob"}; +// CHECK-MESSAGES: :[[@LINE-1]]:32: warning: {{.*}} can be written as a raw string literal +// CHECK-FIXES: {{^}}char const *const QuestionMark{R"(goink?frob)"};{{$}} + +char const *const RegEx{"goink\\(one|two\\)\\\\\\?.*\\nfrob"}; +// CHECK-MESSAGES: :[[@LINE-1]]:25: warning: {{.*}} can be written as a raw string literal +// CHECK-FIXES: {{^}}char const *const RegEx{R"(goink\(one|two\)\\\?.*\nfrob)"};{{$}} + +char const *const Path{"C:\\Program Files\\Vendor\\Application\\Application.exe"}; +// CHECK-MESSAGES: :[[@LINE-1]]:24: warning: {{.*}} can be written as a raw string literal +// CHECK-FIXES: {{^}}char const *const Path{R"(C:\Program Files\Vendor\Application\Application.exe)"};{{$}} + +char const *const ContainsSentinel{"who\\ops)\""}; +// CHECK-MESSAGES: :[[@LINE-1]]:36: warning: {{.*}} can be written as a raw string literal +// CHECK-FIXES: {{^}}char const *const ContainsSentinel{R"lit(who\ops)")lit"};{{$}} + +char const *const ContainsDelim{"whoops)\")lit\""}; +// CHECK-MESSAGES: :[[@LINE-1]]:33: warning: {{.*}} can be written as a raw string literal +// CHECK-FIXES: {{^}}char const *const ContainsDelim{R"lit1(whoops)")lit")lit1"};{{$}} + +char const *const OctalPrintable{"\100\\"}; +// CHECK-MESSAGES: :[[@LINE-1]]:34: warning: {{.*}} can be written as a raw string literal +// CHECK-FIXES: {{^}}char const *const OctalPrintable{R"(@\)"};{{$}} + +char const *const HexPrintable{"\x40\\"}; +// CHECK-MESSAGES: :[[@LINE-1]]:32: warning: {{.*}} can be written as a raw string literal +// CHECK-FIXES: {{^}}char const *const HexPrintable{R"(@\)"};{{$}} + +#define TRICK(arg_) #arg_ +char const *const MacroBody = TRICK(foo\\bar); + +#define HAT(rabbit_) #rabbit_ "foo\\bar" +char const *const StringizedMacroArgument = HAT(foo\\bar); + +#define SUBST(lit_) lit_ +char const *const MacroArgument = SUBST("foo\\bar"); + +template +void fn(char const *const Arg) { + char const *const Str{"foo\\bar"}; + // CHECK-MESSAGES: :[[@LINE-1]]:25: warning: {{.*}} can be written as a raw string literal + // CHECK-FIXES: {{^}} char const *const Str{R"(foo\bar)"};{{$}} +} + +template <> +void fn(char const *const Arg) { + char const *const Str{"foo\\bar"}; + // CHECK-MESSAGES: :[[@LINE-1]]:25: warning: {{.*}} can be written as a raw string literal + // CHECK-FIXES: {{^}} char const *const Str{R"(foo\bar)"};{{$}} +} + +void callFn() { + fn("foo\\bar"); + // CHECK-MESSAGES: :[[@LINE-1]]:11: warning: {{.*}} can be written as a raw string literal + // CHECK-FIXES: {{^}} fn(R"(foo\bar)");{{$}} + fn("foo\\bar"); + // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: {{.*}} can be written as a raw string literal + // CHECK-FIXES: {{^}} fn(R"(foo\bar)");{{$}} +}