diff --git a/clang-tools-extra/clangd/Preamble.cpp b/clang-tools-extra/clangd/Preamble.cpp --- a/clang-tools-extra/clangd/Preamble.cpp +++ b/clang-tools-extra/clangd/Preamble.cpp @@ -15,6 +15,7 @@ #include "clang/Basic/Diagnostic.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" #include "clang/Basic/TokenKinds.h" #include "clang/Frontend/CompilerInvocation.h" #include "clang/Frontend/FrontendActions.h" @@ -106,14 +107,185 @@ const SourceManager *SourceMgr = nullptr; }; -/// Gets the includes in the preamble section of the file by running -/// preprocessor over \p Contents. Returned includes do not contain resolved -/// paths. \p VFS and \p Cmd is used to build the compiler invocation, which -/// might stat/read files. -llvm::Expected> -scanPreambleIncludes(llvm::StringRef Contents, - llvm::IntrusiveRefCntPtr VFS, - const tooling::CompileCommand &Cmd) { +// FIXME: expose from printppoutput +void printMacroDefinition(const IdentifierInfo &II, const MacroInfo &MI, + const Preprocessor &PP, raw_ostream &OS) { + OS << II.getName(); + + if (MI.isFunctionLike()) { + OS << '('; + if (!MI.param_empty()) { + MacroInfo::param_iterator AI = MI.param_begin(), E = MI.param_end(); + for (; AI + 1 != E; ++AI) { + OS << (*AI)->getName(); + OS << ','; + } + + // Last argument. + if ((*AI)->getName() == "__VA_ARGS__") + OS << "..."; + else + OS << (*AI)->getName(); + } + + if (MI.isGNUVarargs()) + OS << "..."; // #define foo(x...) + + OS << ')'; + } + + // GCC always emits a space, even if the macro body is empty. However, do not + // want to emit two spaces if the first token has a leading space. + if (MI.tokens_empty() || !MI.tokens_begin()->hasLeadingSpace()) + OS << ' '; + + SmallString<128> SpellingBuffer; + for (const auto &T : MI.tokens()) { + if (T.hasLeadingSpace()) + OS << ' '; + + OS << PP.getSpelling(T, SpellingBuffer); + } +} + +struct TextualPPDirective { + unsigned DirectiveLine; + // Full text that's representing the directive, including the `#`. + std::string Text; + + bool operator==(const TextualPPDirective &RHS) const { + return std::tie(DirectiveLine, Text) == + std::tie(RHS.DirectiveLine, RHS.Text); + } +}; + +// Collects conditional directives and defines/undefs inside the main file. +struct DirectiveCollector : public PPCallbacks { + DirectiveCollector(const SourceManager &SM, const Preprocessor &PP, + std::vector &TextualDirectives) + : SM(SM), PP(PP), TextualDirectives(TextualDirectives) {} + + void FileChanged(SourceLocation Loc, FileChangeReason Reason, + SrcMgr::CharacteristicKind FileType, + FileID PrevFID) override { + InMainFile = SM.isWrittenInMainFile(Loc); + } + + void MacroDefined(const Token &MacroNameTok, + const MacroDirective *MD) override { + if (!InMainFile) + return; + std::string Text; + llvm::raw_string_ostream OS(Text); + OS << "#define "; + printMacroDefinition(*MacroNameTok.getIdentifierInfo(), *MD->getMacroInfo(), + PP, OS); + addDirective(MacroNameTok.getLocation(), Text, SM); + } + + void MacroUndefined(const Token &MacroNameTok, const MacroDefinition &MD, + const MacroDirective *Undef) override { + if (!InMainFile) + return; + addDirective( + MacroNameTok.getLocation(), + llvm::formatv("#undef {0}", MacroNameTok.getIdentifierInfo()->getName()) + .str(), + SM); + } + + void Endif(SourceLocation Loc, SourceLocation IfLoc) override { + if (!InMainFile) + return; + addDirective(Loc, "#endif", SM); + } + + void Else(SourceLocation Loc, SourceLocation IfLoc) override { + if (!InMainFile) + return; + addDirective(Loc, "#else", SM); + } + + void Ifdef(SourceLocation Loc, const Token &MacroNameTok, + const MacroDefinition &MD) override { + if (!InMainFile) + return; + addDirective( + Loc, + llvm::formatv("#ifdef {0}", MacroNameTok.getIdentifierInfo()->getName()) + .str(), + SM); + } + + void Ifndef(SourceLocation Loc, const Token &MacroNameTok, + const MacroDefinition &MD) override { + if (!InMainFile) + return; + addDirective(Loc, + llvm::formatv("#ifndef {0}", + MacroNameTok.getIdentifierInfo()->getName()) + .str(), + SM); + } + + void If(SourceLocation Loc, SourceRange ConditionRange, + ConditionValueKind ConditionValue) override { + if (!InMainFile) + return; + addDirective( + Loc, + llvm::formatv( + "#if {0}", + Lexer::getSourceText(CharSourceRange::getTokenRange(ConditionRange), + SM, PP.getLangOpts())) + .str(), + SM); + } + + void Elif(SourceLocation Loc, SourceRange ConditionRange, + ConditionValueKind ConditionValue, SourceLocation IfLoc) override { + if (!InMainFile) + return; + addDirective( + Loc, + llvm::formatv( + "#elif {0}", + Lexer::getSourceText(CharSourceRange::getTokenRange(ConditionRange), + SM, PP.getLangOpts())) + .str(), + SM); + } + +private: + void addDirective(SourceLocation Loc, llvm::StringRef Text, + const SourceManager &SM) { + // We check on the caller-side to not calculate Text in redundant cases. + assert(InMainFile); + TextualDirectives.emplace_back(); + TextualPPDirective &TD = TextualDirectives.back(); + auto DecompLoc = SM.getDecomposedLoc(Loc); + TD.DirectiveLine = SM.getLineNumber(DecompLoc.first, DecompLoc.second); + TD.Text = Text.str(); + } + + bool InMainFile = true; + const SourceManager &SM; + const Preprocessor &PP; + std::vector &TextualDirectives; +}; + +struct ScannedPreamble { + std::vector Includes; + std::vector TextualDirectives; +}; +/// Scans the preprocessor directives in the preamble section of the file by +/// running preprocessor over \p Contents. Returned includes do not contain +/// resolved paths. \p VFS and \p Cmd is used to build the compiler invocation, +/// which might stat/read files. +llvm::Expected +scanPreamble(llvm::StringRef Contents, + llvm::IntrusiveRefCntPtr VFS, + const tooling::CompileCommand &Cmd) { // Build and run Preprocessor over the preamble. ParseInputs PI; PI.Contents = Contents.str(); @@ -144,14 +316,18 @@ if (!Action.BeginSourceFile(*Clang, Clang->getFrontendOpts().Inputs[0])) return llvm::createStringError(llvm::inconvertibleErrorCode(), "failed BeginSourceFile"); + const auto &SM = Clang->getSourceManager(); Preprocessor &PP = Clang->getPreprocessor(); IncludeStructure Includes; + PP.addPPCallbacks(collectIncludeStructureCallback(SM, &Includes)); + ScannedPreamble SP; PP.addPPCallbacks( - collectIncludeStructureCallback(Clang->getSourceManager(), &Includes)); + std::make_unique(SM, PP, SP.TextualDirectives)); if (llvm::Error Err = Action.Execute()) return std::move(Err); Action.EndSourceFile(); - return Includes.MainFileIncludes; + SP.Includes = std::move(Includes.MainFileIncludes); + return SP; } const char *spellingForIncDirective(tok::PPKeywordKind IncludeDirective) { @@ -278,7 +454,7 @@ const ParseInputs &Modified, const PreambleData &Baseline) { assert(llvm::sys::path::is_absolute(FileName) && "relative FileName!"); - // First scan the include directives in Baseline and Modified. These will be + // First scan preprocessor directives in Baseline and Modified. These will be // used to figure out newly added directives in Modified. Scanning can fail, // the code just bails out and creates an empty patch in such cases, as: // - If scanning for Baseline fails, no knowledge of existing includes hence @@ -286,25 +462,28 @@ // whole preamble, which is terribly slow. // - If scanning for Modified fails, cannot figure out newly added ones so // there's nothing to do but generate an empty patch. - auto BaselineIncludes = scanPreambleIncludes( + auto BaselineScan = scanPreamble( // Contents needs to be null-terminated. Baseline.Preamble.getContents().str(), Baseline.StatCache->getConsumingFS(Modified.FS), Modified.CompileCommand); - if (!BaselineIncludes) { - elog("Failed to scan includes for baseline of {0}: {1}", FileName, - BaselineIncludes.takeError()); + if (!BaselineScan) { + elog("Failed to scan baseline of {0}: {1}", FileName, + BaselineScan.takeError()); return {}; } - auto ModifiedIncludes = scanPreambleIncludes( + auto ModifiedScan = scanPreamble( Modified.Contents, Baseline.StatCache->getConsumingFS(Modified.FS), Modified.CompileCommand); - if (!ModifiedIncludes) { - elog("Failed to scan includes for modified contents of {0}: {1}", FileName, - ModifiedIncludes.takeError()); + if (!ModifiedScan) { + elog("Failed to scan modified contents of {0}: {1}", FileName, + ModifiedScan.takeError()); return {}; } - // No patch needed if includes are equal. - if (*BaselineIncludes == *ModifiedIncludes) + + bool IncludesChanged = BaselineScan->Includes != ModifiedScan->Includes; + bool DirectivesChanged = + BaselineScan->TextualDirectives != ModifiedScan->TextualDirectives; + if (!IncludesChanged && !DirectivesChanged) return PreamblePatch::unmodified(Baseline); PreamblePatch PP; @@ -314,18 +493,6 @@ "__preamble_patch__.h"); PP.PatchFileName = PatchName.str().str(); - // We are only interested in newly added includes, record the ones in Baseline - // for exclusion. - llvm::DenseMap, - /*Resolved=*/llvm::StringRef> - ExistingIncludes; - for (const auto &Inc : Baseline.Includes.MainFileIncludes) - ExistingIncludes[{Inc.Directive, Inc.Written}] = Inc.Resolved; - // There might be includes coming from disabled regions, record these for - // exclusion too. note that we don't have resolved paths for those. - for (const auto &Inc : *BaselineIncludes) - ExistingIncludes.try_emplace({Inc.Directive, Inc.Written}); - // Calculate extra includes that needs to be inserted. llvm::raw_string_ostream Patch(PP.PatchContents); // Set default filename for subsequent #line directives Patch << "#line 0 \""; @@ -333,25 +500,54 @@ // might lead to problems on windows especially. escapeBackslashAndQuotes(FileName, Patch); Patch << "\"\n"; - for (auto &Inc : *ModifiedIncludes) { - auto It = ExistingIncludes.find({Inc.Directive, Inc.Written}); - // Include already present in the baseline preamble. Set resolved path and - // put into preamble includes. - if (It != ExistingIncludes.end()) { - Inc.Resolved = It->second.str(); - PP.PreambleIncludes.push_back(Inc); - continue; + + if (IncludesChanged) { + // We are only interested in newly added includes, record the ones in + // Baseline for exclusion. + llvm::DenseMap, + /*Resolved=*/llvm::StringRef> + ExistingIncludes; + for (const auto &Inc : Baseline.Includes.MainFileIncludes) + ExistingIncludes[{Inc.Directive, Inc.Written}] = Inc.Resolved; + // There might be includes coming from disabled regions, record these for + // exclusion too. note that we don't have resolved paths for those. + for (const auto &Inc : BaselineScan->Includes) + ExistingIncludes.try_emplace({Inc.Directive, Inc.Written}); + // Calculate extra includes that needs to be inserted. + for (auto &Inc : ModifiedScan->Includes) { + auto It = ExistingIncludes.find({Inc.Directive, Inc.Written}); + // Include already present in the baseline preamble. Set resolved path and + // put into preamble includes. + if (It != ExistingIncludes.end()) { + Inc.Resolved = It->second.str(); + PP.PreambleIncludes.push_back(Inc); + continue; + } + // Include is new in the modified preamble. Inject it into the patch and + // use #line to set the presumed location to where it is spelled. + auto LineCol = offsetToClangLineColumn(Modified.Contents, Inc.HashOffset); + Patch << llvm::formatv("#line {0}\n", LineCol.first); + Patch << llvm::formatv( + "#{0} {1}\n", spellingForIncDirective(Inc.Directive), Inc.Written); } - // Include is new in the modified preamble. Inject it into the patch and use - // #line to set the presumed location to where it is spelled. - auto LineCol = offsetToClangLineColumn(Modified.Contents, Inc.HashOffset); - Patch << llvm::formatv("#line {0}\n", LineCol.first); - Patch << llvm::formatv("#{0} {1}\n", spellingForIncDirective(Inc.Directive), - Inc.Written); } - Patch.flush(); - // FIXME: Handle more directives, e.g. define/undef. + if (DirectivesChanged) { + // We need to patch all the directives, since they are ordering dependent. + // e.g: + // #define BAR(X) NEW(X) // Newly introduced in Modified + // #define BAR(X) OLD(X) // Exists in the Baseline + // + // If we've patched only the first directive, the macro definition would've + // been wrong for the rest of the file, since patch is applied after the + // baseline preamble. For similar reasons conditional directives like ifdef + // also needs to be preserved. As scanning is not preceise and might pick + // directives from disabled regions. + for (const auto &TD : ModifiedScan->TextualDirectives) + Patch << TD.Text << '\n'; + } + dlog("Created preamble patch: {0}", Patch.str()); + Patch.flush(); return PP; } diff --git a/clang-tools-extra/clangd/unittests/PreambleTests.cpp b/clang-tools-extra/clangd/unittests/PreambleTests.cpp --- a/clang-tools-extra/clangd/unittests/PreambleTests.cpp +++ b/clang-tools-extra/clangd/unittests/PreambleTests.cpp @@ -27,6 +27,7 @@ #include using testing::Field; +using testing::IsEmpty; namespace clang { namespace clangd { @@ -181,6 +182,89 @@ ElementsAre(AllOf(Field(&Inclusion::Written, "\"a.h\""), Field(&Inclusion::Resolved, testPath("a.h"))))); } + +llvm::Optional createPatchedAST(llvm::StringRef Baseline, + llvm::StringRef Modified) { + auto BaselinePreamble = TestTU::withCode(Baseline).buildPreamble(); + if (!BaselinePreamble) { + ADD_FAILURE() << "Failed to build baseline preamble"; + return llvm::None; + } + + IgnoreDiagnostics Diags; + auto TU = TestTU::withCode(Modified); + auto CI = buildCompilerInvocation(TU.inputs(), Diags); + if (!CI) { + ADD_FAILURE() << "Failed to build compiler invocation"; + return llvm::None; + } + return ParsedAST::build(testPath("main.cpp"), TU.inputs(), std::move(CI), {}, + BaselinePreamble); +} + +TEST(PreamblePatchTest, Define) { + // BAR should be defined while parsing the AST. + llvm::StringLiteral Modified(R"cpp( + #define BAR(X) X + BAR(int y); + )cpp"); + + auto AST = createPatchedAST("", Modified); + ASSERT_TRUE(AST); + EXPECT_THAT(AST->getDiagnostics(), IsEmpty()); +} + +TEST(PreamblePatchTest, Undef) { + llvm::StringLiteral Baseline = "#define BAR(X) X"; + // BAR should be undefined while parsing the AST. + Annotations Modified(R"cpp( + #define BAR(X) X + #undef BAR + [[BAR]](int y); + )cpp"); + + auto AST = createPatchedAST(Baseline, Modified.code()); + ASSERT_TRUE(AST); + EXPECT_THAT(AST->getDiagnostics(), + ElementsAre(Field(&Diag::Range, Modified.range()))); +} + +TEST(PreamblePatchTest, OrderingPreserved) { + // Make sure undef goes before define in the patch. + llvm::StringLiteral Baseline = "#define BAR(X) X"; + llvm::StringRef Modified(R"cpp( + #undef BAR + #define BAR(X) X + BAR(int y); + )cpp"); + + auto AST = createPatchedAST(Baseline, Modified); + ASSERT_TRUE(AST); + EXPECT_THAT(AST->getDiagnostics(), IsEmpty()); +} + +TEST(PreamblePatchTest, SkippedBlocks) { + // None of the undef directives should be executed. + llvm::StringRef Modified(R"cpp( + #define BAR(X) X + #ifdef TEST + #undef BAR + #endif + #ifndef BAR + #undef BAR + #endif + #if defined(X) && X + #undef BAR + #elif defined(Y) && Y + #undef BAR + #endif + BAR(int y); + )cpp"); + + auto AST = createPatchedAST("", Modified); + ASSERT_TRUE(AST); + EXPECT_THAT(AST->getDiagnostics(), IsEmpty()); +} } // namespace } // namespace clangd } // namespace clang