diff --git a/clang/include/clang/Lex/Pragma.h b/clang/include/clang/Lex/Pragma.h --- a/clang/include/clang/Lex/Pragma.h +++ b/clang/include/clang/Lex/Pragma.h @@ -123,6 +123,13 @@ PragmaNamespace *getIfNamespace() override { return this; } }; +/// Destringize a \c _Pragma("") string according to C11 6.10.9.1: +/// "The string literal is destringized by deleting any encoding prefix, +/// deleting the leading and trailing double-quotes, replacing each escape +/// sequence \" by a double-quote, and replacing each escape sequence \\ by a +/// single backslash." +void prepare_PragmaString(SmallVectorImpl &StrVal); + } // namespace clang #endif // LLVM_CLANG_LEX_PRAGMA_H diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp --- a/clang/lib/Lex/DependencyDirectivesScanner.cpp +++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp @@ -19,6 +19,7 @@ #include "clang/Basic/Diagnostic.h" #include "clang/Lex/LexDiagnostic.h" #include "clang/Lex/Lexer.h" +#include "clang/Lex/Pragma.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" @@ -72,6 +73,8 @@ // Set the lexer to use 'tok::at' for '@', instead of 'tok::unknown'. LangOpts.ObjC = true; LangOpts.LineComment = true; + // FIXME: we do not enable C11 or C++11, so we are missing u/u8/U"" and + // R"()" literals. return LangOpts; } @@ -91,6 +94,10 @@ void skipLine(const char *&First, const char *const End); void skipDirective(StringRef Name, const char *&First, const char *const End); + /// Returns the spelling of a string literal or identifier after performing + /// any processing needed to handle \c clang::Token::NeedsCleaning. + StringRef cleanStringIfNeeded(const dependency_directives_scan::Token &Tok); + /// Lexes next token and if it is identifier returns its string, otherwise /// it skips the current line and returns \p std::nullopt. /// @@ -112,6 +119,22 @@ const char *&First, const char *const End); + /// Lexes next token and returns true iff it matches the kind \p K. + /// Otherwise it skips the current line and returns false. + /// + /// In any case (whatever the token kind) \p First and the \p Lexer will + /// advance beyond the token. + [[nodiscard]] bool isNextTokenOrSkipLine(tok::TokenKind K, const char *&First, + const char *const End); + + /// Lexes next token and if it is string literal, returns its string. + /// Otherwise, it skips the current line and returns \p std::nullopt. + /// + /// In any case (whatever the token kind) \p First and the \p Lexer will + /// advance beyond the token. + [[nodiscard]] std::optional + tryLexStringLiteralOrSkipLine(const char *&First, const char *const End); + [[nodiscard]] bool scanImpl(const char *First, const char *const End); [[nodiscard]] bool lexPPLine(const char *&First, const char *const End); [[nodiscard]] bool lexAt(const char *&First, const char *const End); @@ -119,6 +142,7 @@ [[nodiscard]] bool lexDefine(const char *HashLoc, const char *&First, const char *const End); [[nodiscard]] bool lexPragma(const char *&First, const char *const End); + [[nodiscard]] bool lex_Pragma(const char *&First, const char *const End); [[nodiscard]] bool lexEndif(const char *&First, const char *const End); [[nodiscard]] bool lexDefault(DirectiveKind Kind, const char *&First, const char *const End); @@ -525,15 +549,8 @@ } } -[[nodiscard]] std::optional -Scanner::tryLexIdentifierOrSkipLine(const char *&First, const char *const End) { - const dependency_directives_scan::Token &Tok = lexToken(First, End); - if (Tok.isNot(tok::raw_identifier)) { - if (!Tok.is(tok::eod)) - skipLine(First, End); - return std::nullopt; - } - +StringRef +Scanner::cleanStringIfNeeded(const dependency_directives_scan::Token &Tok) { bool NeedsCleaning = Tok.Flags & clang::Token::NeedsCleaning; if (LLVM_LIKELY(!NeedsCleaning)) return Input.slice(Tok.Offset, Tok.getEnd()); @@ -541,6 +558,9 @@ SmallString<64> Spelling; Spelling.resize(Tok.Length); + // FIXME: C++11 raw string literals need special handling (see getSpellingSlow + // in the Lexer). Currently we cannot see them due to our LangOpts. + unsigned SpellingLength = 0; const char *BufPtr = Input.begin() + Tok.Offset; const char *AfterIdent = Input.begin() + Tok.getEnd(); @@ -555,6 +575,18 @@ .first->first(); } +std::optional +Scanner::tryLexIdentifierOrSkipLine(const char *&First, const char *const End) { + const dependency_directives_scan::Token &Tok = lexToken(First, End); + if (Tok.isNot(tok::raw_identifier)) { + if (!Tok.is(tok::eod)) + skipLine(First, End); + return std::nullopt; + } + + return cleanStringIfNeeded(Tok); +} + StringRef Scanner::lexIdentifier(const char *&First, const char *const End) { std::optional Id = tryLexIdentifierOrSkipLine(First, End); assert(Id && "expected identifier token"); @@ -572,6 +604,28 @@ return false; } +bool Scanner::isNextTokenOrSkipLine(tok::TokenKind K, const char *&First, + const char *const End) { + const dependency_directives_scan::Token &Tok = lexToken(First, End); + if (Tok.is(K)) + return true; + skipLine(First, End); + return false; +} + +std::optional +Scanner::tryLexStringLiteralOrSkipLine(const char *&First, + const char *const End) { + const dependency_directives_scan::Token &Tok = lexToken(First, End); + if (!tok::isStringLiteral(Tok.Kind)) { + if (!Tok.is(tok::eod)) + skipLine(First, End); + return std::nullopt; + } + + return cleanStringIfNeeded(Tok); +} + bool Scanner::lexAt(const char *&First, const char *const End) { // Handle "@import". @@ -629,6 +683,41 @@ return lexModuleDirectiveBody(Kind, First, End); } +bool Scanner::lex_Pragma(const char *&First, const char *const End) { + if (!isNextTokenOrSkipLine(tok::l_paren, First, End)) + return false; + + std::optional Str = tryLexStringLiteralOrSkipLine(First, End); + + if (!Str || !isNextTokenOrSkipLine(tok::r_paren, First, End)) + return false; + + SmallString<64> Buffer(*Str); + prepare_PragmaString(Buffer); + + // Use a new scanner instance since the tokens will be inside the allocated + // string. We should already have captured all the relevant tokens in the + // current scanner. + SmallVector DiscardTokens; + const char *Begin = Buffer.c_str(); + Scanner PragmaScanner{StringRef(Begin, Buffer.size()), DiscardTokens, Diags, + InputSourceLoc}; + + PragmaScanner.TheLexer.setParsingPreprocessorDirective(true); + if (PragmaScanner.lexPragma(Begin, Buffer.end())) + return true; + + DirectiveKind K = PragmaScanner.topDirective(); + if (K == pp_none) { + skipLine(First, End); + return false; + } + + assert(Begin == Buffer.end()); + pushDirective(K); + return false; +} + bool Scanner::lexPragma(const char *&First, const char *const End) { std::optional FoundId = tryLexIdentifierOrSkipLine(First, End); if (!FoundId) @@ -713,6 +802,7 @@ case 'i': case 'e': case 'm': + case '_': return true; } return false; @@ -749,6 +839,12 @@ if (*First == 'i' || *First == 'e' || *First == 'm') return lexModule(First, End); + if (*First == '_') { + if (isNextIdentifierOrSkipLine("_Pragma", First, End)) + return lex_Pragma(First, End); + return false; + } + // Handle preprocessing directives. TheLexer.setParsingPreprocessorDirective(true); diff --git a/clang/lib/Lex/Pragma.cpp b/clang/lib/Lex/Pragma.cpp --- a/clang/lib/Lex/Pragma.cpp +++ b/clang/lib/Lex/Pragma.cpp @@ -262,17 +262,48 @@ SourceLocation RParenLoc = Tok.getLocation(); bool Invalid = false; - std::string StrVal = getSpelling(StrTok, &Invalid); + SmallString<64> StrVal; + StrVal.resize(StrTok.getLength()); + StringRef StrValRef = getSpelling(StrTok, StrVal, &Invalid); if (Invalid) { Diag(PragmaLoc, diag::err__Pragma_malformed); return; } - // The _Pragma is lexically sound. Destringize according to C11 6.10.9.1: - // "The string literal is destringized by deleting any encoding prefix, - // deleting the leading and trailing double-quotes, replacing each escape - // sequence \" by a double-quote, and replacing each escape sequence \\ by a - // single backslash." + assert(StrValRef.size() <= StrVal.size()); + + // If the token was spelled somewhere else, copy it. + if (StrValRef.begin() != StrVal.begin()) + StrVal.assign(StrValRef); + // Truncate if necessary. + else if (StrValRef.size() != StrVal.size()) + StrVal.resize(StrValRef.size()); + + // The _Pragma is lexically sound. Destringize according to C11 6.10.9.1. + prepare_PragmaString(StrVal); + + // Plop the string (including the newline and trailing null) into a buffer + // where we can lex it. + Token TmpTok; + TmpTok.startToken(); + CreateString(StrVal, TmpTok); + SourceLocation TokLoc = TmpTok.getLocation(); + + // Make and enter a lexer object so that we lex and expand the tokens just + // like any others. + Lexer *TL = Lexer::Create_PragmaLexer(TokLoc, PragmaLoc, RParenLoc, + StrVal.size(), *this); + + EnterSourceFileWithLexer(TL, nullptr); + + // With everything set up, lex this as a #pragma directive. + HandlePragmaDirective({PIK__Pragma, PragmaLoc}); + + // Finally, return whatever came after the pragma directive. + return Lex(Tok); +} + +void clang::prepare_PragmaString(SmallVectorImpl &StrVal) { if (StrVal[0] == 'L' || StrVal[0] == 'U' || (StrVal[0] == 'u' && StrVal[1] != '8')) StrVal.erase(StrVal.begin()); @@ -296,8 +327,8 @@ // Remove 'R " d-char-sequence' and 'd-char-sequence "'. We'll replace the // parens below. - StrVal.erase(0, 2 + NumDChars); - StrVal.erase(StrVal.size() - 1 - NumDChars); + StrVal.erase(StrVal.begin(), StrVal.begin() + 2 + NumDChars); + StrVal.erase(StrVal.end() - 1 - NumDChars, StrVal.end()); } else { assert(StrVal[0] == '"' && StrVal[StrVal.size()-1] == '"' && "Invalid string token!"); @@ -319,27 +350,7 @@ StrVal[0] = ' '; // Replace the terminating quote with a \n. - StrVal[StrVal.size()-1] = '\n'; - - // Plop the string (including the newline and trailing null) into a buffer - // where we can lex it. - Token TmpTok; - TmpTok.startToken(); - CreateString(StrVal, TmpTok); - SourceLocation TokLoc = TmpTok.getLocation(); - - // Make and enter a lexer object so that we lex and expand the tokens just - // like any others. - Lexer *TL = Lexer::Create_PragmaLexer(TokLoc, PragmaLoc, RParenLoc, - StrVal.size(), *this); - - EnterSourceFileWithLexer(TL, nullptr); - - // With everything set up, lex this as a #pragma directive. - HandlePragmaDirective({PIK__Pragma, PragmaLoc}); - - // Finally, return whatever came after the pragma directive. - return Lex(Tok); + StrVal[StrVal.size() - 1] = '\n'; } /// HandleMicrosoft__pragma - Like Handle_Pragma except the pragma text diff --git a/clang/test/ClangScanDeps/_Pragma-once.c b/clang/test/ClangScanDeps/_Pragma-once.c new file mode 100644 --- /dev/null +++ b/clang/test/ClangScanDeps/_Pragma-once.c @@ -0,0 +1,24 @@ +// Test scanning deps works with _Pragma syntax when not inside a macro. + +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: sed "s|DIR|%/t|g" %t/cdb.json.template > %t/cdb.json + +// RUN: clang-scan-deps -compilation-database %t/cdb.json -j 1 + +//--- cdb.json.template +[{ + "directory": "DIR", + "command": "clang -fsyntax-only DIR/tu.c", + "file": "DIR/tu.c" +}] + +//--- a.h +_Pragma("once") +#include "b.h" + +//--- b.h +#include "a.h" + +//--- tu.c +#include "a.h" diff --git a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp --- a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp +++ b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp @@ -503,6 +503,92 @@ EXPECT_STREQ("#pragma clang module import\n", Out.data()); } +TEST(MinimizeSourceToDependencyDirectivesTest, UnderscorePragma) { + SmallVector Out; + + ASSERT_FALSE(minimizeSourceToDependencyDirectives(R"(_)", Out)); + EXPECT_STREQ("\n", Out.data()); + ASSERT_FALSE(minimizeSourceToDependencyDirectives(R"(_Pragma)", Out)); + EXPECT_STREQ("\n", Out.data()); + ASSERT_FALSE(minimizeSourceToDependencyDirectives(R"(_Pragma()", Out)); + EXPECT_STREQ("\n", Out.data()); + ASSERT_FALSE(minimizeSourceToDependencyDirectives(R"(_Pragma())", Out)); + EXPECT_STREQ("\n", Out.data()); + ASSERT_FALSE(minimizeSourceToDependencyDirectives(R"(_Pragma(")", Out)); + EXPECT_STREQ("\n", Out.data()); + ASSERT_FALSE(minimizeSourceToDependencyDirectives(R"(_Pragma("A"))", Out)); + EXPECT_STREQ("\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + R"x(_Pragma("push_macro(\"MACRO\")"))x", Out)); + EXPECT_STREQ(R"x(_Pragma("push_macro(\"MACRO\")"))x" + "\n", + Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + R"x(_Pragma("pop_macro(\"MACRO\")"))x", Out)); + EXPECT_STREQ(R"x(_Pragma("pop_macro(\"MACRO\")"))x" + "\n", + Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + R"x(_Pragma("include_alias(\"A\", \"B\")"))x", Out)); + EXPECT_STREQ(R"x(_Pragma("include_alias(\"A\", \"B\")"))x" + "\n", + Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + R"x(_Pragma("include_alias(, )"))x", Out)); + EXPECT_STREQ(R"x(_Pragma("include_alias(, )"))x" + "\n", + Out.data()); + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives(R"(_Pragma("clang"))", Out)); + EXPECT_STREQ("\n", Out.data()); + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives(R"(_Pragma("clang module"))", Out)); + EXPECT_STREQ("\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + R"(_Pragma("clang module impor"))", Out)); + EXPECT_STREQ("\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + R"(_Pragma("clang module import"))", Out)); + EXPECT_STREQ(R"(_Pragma("clang module import"))" + "\n", + Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + R"(_Pragma("clang \ + module \ + import"))", + Out)); + EXPECT_STREQ(R"(_Pragma("clang \ + module \ + import"))" + "\n", + Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + R"(_Pragma(L"clang module import"))", Out)); + EXPECT_STREQ(R"(_Pragma(L"clang module import"))" + "\n", + Out.data()); + + // FIXME: u"" strings depend on using C11 language mode + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + R"(_Pragma(u"clang module import"))", Out)); + EXPECT_STREQ("\n", Out.data()); + + // FIXME: R"()" strings depend on using C++ 11 language mode + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + R"(_Pragma(R"abc(clang module import)abc"))", Out)); + EXPECT_STREQ("\n", Out.data()); +} + TEST(MinimizeSourceToDependencyDirectivesTest, Include) { SmallVector Out; @@ -757,20 +843,26 @@ #pragma once // another comment #include +_Pragma("once") )"; ASSERT_FALSE( minimizeSourceToDependencyDirectives(Source, Out, Tokens, Directives)); - EXPECT_STREQ("#pragma once\n#include \n", Out.data()); - ASSERT_EQ(Directives.size(), 3u); + EXPECT_STREQ("#pragma once\n#include \n_Pragma(\"once\")\n", + Out.data()); + ASSERT_EQ(Directives.size(), 4u); EXPECT_EQ(Directives[0].Kind, dependency_directives_scan::pp_pragma_once); + EXPECT_EQ(Directives[2].Kind, dependency_directives_scan::pp_pragma_once); Source = R"(// comment #pragma once extra tokens // another comment #include + _Pragma("once") extra tokens )"; ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); - EXPECT_STREQ("#pragma once extra tokens\n#include \n", Out.data()); + EXPECT_STREQ("#pragma once extra tokens\n#include " + "\n_Pragma(\"once\")\n", + Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest,