diff --git a/clang/include/clang/Lex/DependencyDirectivesScanner.h b/clang/include/clang/Lex/DependencyDirectivesScanner.h --- a/clang/include/clang/Lex/DependencyDirectivesScanner.h +++ b/clang/include/clang/Lex/DependencyDirectivesScanner.h @@ -82,6 +82,9 @@ cxx_import_decl, cxx_export_module_decl, cxx_export_import_decl, + /// Indicates that there are tokens present between the last scanned directive + /// and eof. The \p Directive::Tokens array will be empty for this kind. + tokens_present_before_eof, pp_eof, }; @@ -123,6 +126,9 @@ /// \param Directives The previously scanned dependency /// directives. /// \param OS the stream to print the dependency directives on. +/// \param PrintMarkerForTokensBeforeEOF if true also prints +/// \p dependency_directives_scan::tokens_present_before_eof, otherwise this +/// directive will be ignored. /// /// This is used primarily for testing purposes, during dependency scanning the /// \p Lexer uses the tokens directly, not their printed version. diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp --- a/clang/lib/Lex/DependencyDirectivesScanner.cpp +++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp @@ -87,6 +87,9 @@ dependency_directives_scan::Token &lexIncludeFilename(const char *&First, const char *const End); + void skipLine(const char *&First, const char *const End); + void skipDirective(StringRef Name, const char *&First, const char *const End); + /// Lexes next token and if it is identifier returns its string, otherwise /// it skips the current line and returns \p None. /// @@ -150,6 +153,7 @@ DiagnosticsEngine *Diags; SourceLocation InputSourceLoc; + const char *LastTokenPtr = nullptr; /// Keeps track of the tokens for the currently lexed directive. Once a /// directive is fully lexed and "committed" then the tokens get appended to /// \p Tokens and \p CurDirToks is cleared for the next directive. @@ -364,7 +368,7 @@ return (Cur + 1) < End && isAsciiIdentifierContinue(*(Cur + 1)); } -static void skipLine(const char *&First, const char *const End) { +void Scanner::skipLine(const char *&First, const char *const End) { for (;;) { assert(First <= End); if (First == End) @@ -379,6 +383,7 @@ // Iterate over strings correctly to avoid comments and newlines. if (*First == '"' || (*First == '\'' && !isQuoteCppDigitSeparator(Start, First, End))) { + LastTokenPtr = First; if (isRawStringLiteral(Start, First)) skipRawString(First, End); else @@ -388,6 +393,7 @@ // Iterate over comments correctly. if (*First != '/' || End - First < 2) { + LastTokenPtr = First; ++First; continue; } @@ -399,6 +405,7 @@ } if (First[1] != '*') { + LastTokenPtr = First; ++First; continue; } @@ -416,8 +423,8 @@ } } -static void skipDirective(StringRef Name, const char *&First, - const char *const End) { +void Scanner::skipDirective(StringRef Name, const char *&First, + const char *const End) { if (llvm::StringSwitch(Name) .Case("warning", true) .Case("error", true) @@ -710,6 +717,8 @@ return false; } + LastTokenPtr = First; + TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ true); auto ScEx1 = make_scope_exit([&]() { @@ -803,6 +812,9 @@ if (!Error) { // Add an EOF on success. + if (LastTokenPtr && + (Tokens.empty() || LastTokenPtr > Input.begin() + Tokens.back().Offset)) + pushDirective(tokens_present_before_eof); pushDirective(pp_eof); } @@ -851,6 +863,8 @@ }; for (const dependency_directives_scan::Directive &Directive : Directives) { + if (Directive.Kind == tokens_present_before_eof) + OS << ""; Optional PrevTokenKind; for (const dependency_directives_scan::Token &Tok : Directive.Tokens) { if (PrevTokenKind && needsSpaceSeparator(*PrevTokenKind, Tok)) diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -4323,6 +4323,8 @@ while (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) { if (DepDirectives.front().Kind == pp_eof) return LexEndOfFile(Result, BufferEnd); + if (DepDirectives.front().Kind == tokens_present_before_eof) + MIOpt.ReadToken(); NextDepDirectiveTokenIndex = 0; DepDirectives = DepDirectives.drop_front(); } @@ -4398,6 +4400,7 @@ case cxx_import_decl: case cxx_export_module_decl: case cxx_export_import_decl: + case tokens_present_before_eof: break; case pp_if: case pp_ifdef: diff --git a/clang/unittests/Lex/CMakeLists.txt b/clang/unittests/Lex/CMakeLists.txt --- a/clang/unittests/Lex/CMakeLists.txt +++ b/clang/unittests/Lex/CMakeLists.txt @@ -9,6 +9,7 @@ LexerTest.cpp PPCallbacksTest.cpp PPConditionalDirectiveRecordTest.cpp + PPDependencyDirectivesTest.cpp PPMemoryAllocationsTest.cpp ) @@ -19,4 +20,6 @@ clangLex clangParse clangSema + + LLVMTestingSupport ) diff --git a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp --- a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp +++ b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp @@ -57,10 +57,11 @@ ASSERT_FALSE(minimizeSourceToDependencyDirectives("abc def\nxyz", Out, Tokens, Directives)); - EXPECT_TRUE(Out.empty()); + EXPECT_STREQ("\n", Out.data()); EXPECT_TRUE(Tokens.empty()); - ASSERT_EQ(1u, Directives.size()); - ASSERT_EQ(pp_eof, Directives.back().Kind); + ASSERT_EQ(2u, Directives.size()); + EXPECT_EQ(tokens_present_before_eof, Directives[0].Kind); + EXPECT_EQ(pp_eof, Directives[1].Kind); } TEST(MinimizeSourceToDependencyDirectivesTest, AllTokens) { @@ -451,7 +452,7 @@ SmallVector Out; ASSERT_FALSE(minimizeSourceToDependencyDirectives("#pragma A\n", Out)); - EXPECT_STREQ("", Out.data()); + EXPECT_STREQ("\n", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives( "#pragma push_macro(\"MACRO\")\n", Out)); @@ -470,15 +471,15 @@ EXPECT_STREQ("#pragma include_alias(, )\n", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives("#pragma clang\n", Out)); - EXPECT_STREQ("", Out.data()); + EXPECT_STREQ("\n", Out.data()); ASSERT_FALSE( minimizeSourceToDependencyDirectives("#pragma clang module\n", Out)); - EXPECT_STREQ("", Out.data()); + EXPECT_STREQ("\n", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives( "#pragma clang module impor\n", Out)); - EXPECT_STREQ("", Out.data()); + EXPECT_STREQ("\n", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives( "#pragma clang module import\n", Out)); @@ -663,7 +664,7 @@ "#error \\\n#include \n", }) { ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); - EXPECT_STREQ("", Out.data()); + EXPECT_STREQ("\n", Out.data()); } for (auto Source : { @@ -767,7 +768,8 @@ )"; ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); EXPECT_STREQ( - "#if NEVER_ENABLED\n#define why(fmt,...) #error don't try me\n#endif\n", + "#if NEVER_ENABLED\n#define why(fmt,...) #error don't try me\n#endif\n" + "\n", Out.data()); Source = R"(#if NEVER_ENABLED @@ -778,7 +780,8 @@ )"; ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); EXPECT_STREQ( - "#if NEVER_ENABLED\n#define why(fmt,...) \"quote dropped\n#endif\n", + "#if NEVER_ENABLED\n#define why(fmt,...) \"quote dropped\n#endif\n" + "\n", Out.data()); } @@ -799,11 +802,11 @@ StringRef Source = "#define X '\\ \t\nx'\nvoid foo() {}"; ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); - EXPECT_STREQ("#define X '\\ \t\nx'\n", Out.data()); + EXPECT_STREQ("#define X '\\ \t\nx'\n\n", Out.data()); Source = "#define X \"\\ \r\nx\"\nvoid foo() {}"; ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); - EXPECT_STREQ("#define X \"\\ \r\nx\"\n", Out.data()); + EXPECT_STREQ("#define X \"\\ \r\nx\"\n\n", Out.data()); Source = "#define X \"\\ \r\nx\n#include \n"; ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); @@ -848,11 +851,56 @@ "exp\\\nort import:l[[rename]];" "import<<=3;import a b d e d e f e;" "import foo[[no_unique_address]];import foo();" - "import f(:sefse);import f(->a=3);\n", + "import f(:sefse);import f(->a=3);" + "\n", Out.data()); - ASSERT_EQ(Directives.size(), 10u); + ASSERT_EQ(Directives.size(), 11u); EXPECT_EQ(Directives[0].Kind, pp_include); EXPECT_EQ(Directives[1].Kind, cxx_export_module_decl); } +TEST(MinimizeSourceToDependencyDirectivesTest, TokensBeforeEOF) { + SmallString<128> Out; + + StringRef Source = R"( + #define A + #ifdef B + int x; + #endif + )"; + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); + EXPECT_STREQ("#define A\n\n", Out.data()); + + Source = R"( + #ifndef A + #define A + #endif // some comment + + // other comment + )"; + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); + EXPECT_STREQ("#ifndef A\n#define A\n#endif\n", Out.data()); + + Source = R"( + #ifndef A + #define A + #endif /* some comment + + */ + )"; + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); + EXPECT_STREQ("#ifndef A\n#define A\n#endif\n", Out.data()); + + Source = R"( + #ifndef A + #define A + #endif /* some comment + + */ + int x; + )"; + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); + EXPECT_STREQ("#ifndef A\n#define A\n#endif\n\n", Out.data()); +} + } // end anonymous namespace diff --git a/clang/unittests/Lex/PPDependencyDirectivesTest.cpp b/clang/unittests/Lex/PPDependencyDirectivesTest.cpp new file mode 100644 --- /dev/null +++ b/clang/unittests/Lex/PPDependencyDirectivesTest.cpp @@ -0,0 +1,148 @@ +//===- unittests/Lex/PPDependencyDirectivesTest.cpp -------------------------=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/DiagnosticOptions.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/Basic/TargetOptions.h" +#include "clang/Lex/DependencyDirectivesScanner.h" +#include "clang/Lex/HeaderSearch.h" +#include "clang/Lex/HeaderSearchOptions.h" +#include "clang/Lex/ModuleLoader.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/PreprocessorOptions.h" +#include "llvm/Testing/Support/Error.h" +#include "gtest/gtest.h" + +using namespace clang; + +namespace { + +// The test fixture. +class PPDependencyDirectivesTest : public ::testing::Test { +protected: + PPDependencyDirectivesTest() + : FileMgr(FileMgrOpts), DiagID(new DiagnosticIDs()), + Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()), + SourceMgr(Diags, FileMgr), TargetOpts(new TargetOptions) { + TargetOpts->Triple = "x86_64-apple-macos12"; + Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts); + } + + FileSystemOptions FileMgrOpts; + FileManager FileMgr; + IntrusiveRefCntPtr DiagID; + DiagnosticsEngine Diags; + SourceManager SourceMgr; + LangOptions LangOpts; + std::shared_ptr TargetOpts; + IntrusiveRefCntPtr Target; +}; + +class IncludeCollector : public PPCallbacks { +public: + Preprocessor &PP; + SmallVectorImpl &IncludedFiles; + + IncludeCollector(Preprocessor &PP, SmallVectorImpl &IncludedFiles) + : PP(PP), IncludedFiles(IncludedFiles) {} + + void LexedFileChanged(FileID FID, LexedFileChangeReason Reason, + SrcMgr::CharacteristicKind FileType, FileID PrevFID, + SourceLocation Loc) override { + if (Reason != LexedFileChangeReason::EnterFile) + return; + if (FID == PP.getPredefinesFileID()) + return; + StringRef Filename = + PP.getSourceManager().getSLocEntry(FID).getFile().getName(); + IncludedFiles.push_back(Filename); + } +}; + +TEST_F(PPDependencyDirectivesTest, MacroGuard) { + // "head1.h" has a macro guard and should only be included once. + // "head2.h" and "head3.h" have tokens following the macro check, they should + // be included multiple times. + + auto VFS = new llvm::vfs::InMemoryFileSystem(); + VFS->addFile( + "head1.h", 0, + llvm::MemoryBuffer::getMemBuffer("#ifndef H1_H\n#define H1_H\n#endif\n")); + VFS->addFile( + "head2.h", 0, + llvm::MemoryBuffer::getMemBuffer("#ifndef H2_H\n#define H2_H\n#endif\n\n" + "extern int foo;\n")); + VFS->addFile("head3.h", 0, + llvm::MemoryBuffer::getMemBuffer( + "#ifndef H3_H\n#define H3_H\n#endif\n\n" + "#ifdef SOMEMAC\nextern int foo;\n#endif\n")); + VFS->addFile("main.c", 0, + llvm::MemoryBuffer::getMemBuffer( + "#include \"head1.h\"\n#include \"head1.h\"\n" + "#include \"head2.h\"\n#include \"head2.h\"\n" + "#include \"head3.h\"\n#include \"head3.h\"\n")); + FileMgr.setVirtualFileSystem(VFS); + + Optional FE; + ASSERT_THAT_ERROR(FileMgr.getFileRef("main.c").moveInto(FE), + llvm::Succeeded()); + SourceMgr.setMainFileID( + SourceMgr.createFileID(*FE, SourceLocation(), SrcMgr::C_User)); + + struct DepDirectives { + SmallVector Tokens; + SmallVector Directives; + }; + SmallVector> DepDirectivesObjects; + + auto getDependencyDirectives = [&](FileEntryRef File) + -> Optional> { + DepDirectivesObjects.push_back(std::make_unique()); + StringRef Input = (*FileMgr.getBufferForFile(File))->getBuffer(); + bool Err = scanSourceForDependencyDirectives( + Input, DepDirectivesObjects.back()->Tokens, + DepDirectivesObjects.back()->Directives); + EXPECT_FALSE(Err); + return llvm::makeArrayRef(DepDirectivesObjects.back()->Directives); + }; + + auto PPOpts = std::make_shared(); + PPOpts->DependencyDirectivesForFile = [&](FileEntryRef File) + -> Optional> { + return getDependencyDirectives(File); + }; + + TrivialModuleLoader ModLoader; + HeaderSearch HeaderInfo(std::make_shared(), SourceMgr, + Diags, LangOpts, Target.get()); + Preprocessor PP(PPOpts, Diags, LangOpts, SourceMgr, HeaderInfo, ModLoader, + /*IILookup =*/nullptr, + /*OwnsHeaderSearch =*/false); + PP.Initialize(*Target); + + SmallVector IncludedFiles; + PP.addPPCallbacks(std::make_unique(PP, IncludedFiles)); + PP.EnterMainSourceFile(); + while (true) { + Token tok; + PP.Lex(tok); + if (tok.is(tok::eof)) + break; + } + + SmallVector ExpectedIncludes{ + "main.c", "./head1.h", "./head2.h", "./head2.h", "./head3.h", "./head3.h", + }; + EXPECT_EQ(IncludedFiles, ExpectedIncludes); +} + +} // anonymous namespace