diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -94,6 +94,7 @@ SemanticSelection.cpp SourceCode.cpp QueryDriverDatabase.cpp + TextMarks.cpp TidyProvider.cpp TUScheduler.cpp URI.cpp diff --git a/clang-tools-extra/clangd/FindSymbols.cpp b/clang-tools-extra/clangd/FindSymbols.cpp --- a/clang-tools-extra/clangd/FindSymbols.cpp +++ b/clang-tools-extra/clangd/FindSymbols.cpp @@ -289,6 +289,10 @@ /// - visiting decls is actually simple, so we don't hit the complicated /// cases that RAV mostly helps with (types, expressions, etc.) class DocumentOutline { + struct TextMarkSymbol { + DocumentSymbol docSym; + bool IsGroup; + }; // A DocumentSymbol we're constructing. // We use this instead of DocumentSymbol directly so that we can keep track // of the nodes we insert for macros. @@ -303,16 +307,98 @@ DocumentSymbol build() && { for (SymBuilder &C : Children) { Symbol.children.push_back(std::move(C).build()); + } + return std::move(Symbol); + } + + /// Fix up ranges so each parent's range contains all of its descendants. + void standardizeRanges() { + for (SymBuilder &C : Children) { + C.standardizeRanges(); // Expand range to ensure children nest properly, which editors expect. // This can fix some edge-cases in the AST, but is vital for macros. // A macro expansion "contains" AST node if it covers the node's primary // location, but it may not span the node's whole range. - Symbol.range.start = - std::min(Symbol.range.start, Symbol.children.back().range.start); - Symbol.range.end = - std::max(Symbol.range.end, Symbol.children.back().range.end); + Symbol.range.start = std::min(Symbol.range.start, C.Symbol.range.start); + Symbol.range.end = std::max(Symbol.range.end, C.Symbol.range.end); + } + // FIXME: We should also make sure nodes are properly sorted and nested + // here. + } + + /// Merge in `TextMarks` which are sorted in descending order by line. + /// + /// This assumes this `SymBuilder` has standardized ranges and is + /// recursively sorted in ascending order by range. + void mergeTextMarks(std::vector &TextMarks, bool IsRoot) { + size_t lastGroupIndex = 0; + size_t childIndex = 0; + bool hasOpenGroup = false; + while (!TextMarks.empty()) { + const auto &TextMark = TextMarks.back(); + // If we don't contain the mark, we can break early, since it must + // be in a parent node instead. Note that we can still have children + // that need to be added to the last TextMark group though. + if (!Symbol.range.contains(TextMark.docSym.range) && !IsRoot) + break; + + while (childIndex < Children.size()) { + auto &C = Children[childIndex]; + + // Next mark is contained in the child, we need to recurse to let the + // child handle it. + if (C.Symbol.range.contains(TextMark.docSym.range)) { + C.mergeTextMarks(TextMarks, /** IsRoot= */ false); + break; + } + // Text mark is after the child node, so we can process the child. + if (C.Symbol.range < TextMark.docSym.range) { + if (hasOpenGroup) { // Child belongs to the previous TextMark group. + Children[lastGroupIndex].Children.push_back(C); + Children[lastGroupIndex].Symbol.range.mergeWith(C.Symbol.range); + Children.erase(Children.begin() + childIndex); + } else { // Child doesn't belong to a TextMark group. Skip it. + childIndex++; + } + continue; + } + // Text mark is before the child node. + SymBuilder Node; + Node.EnclosingMacroLoc = C.EnclosingMacroLoc; + Node.Symbol = TextMark.docSym; + Children.insert(Children.begin() + childIndex, Node); + if (TextMark.IsGroup) { + lastGroupIndex = childIndex; + hasOpenGroup = true; + } else { // New text mark ends the previous group if one existed. + // FIXME: Consider letting non-groups nest inside a group. + hasOpenGroup = false; + } + TextMarks.pop_back(); + childIndex++; + break; + } + // Reached the end of our children but still have TextMark(s) left, + // they should just be normal children. + if (childIndex == Children.size()) { + SymBuilder Node; + Node.EnclosingMacroLoc = EnclosingMacroLoc; + Node.Symbol = TextMark.docSym; + Children.push_back(Node); + childIndex++; + TextMarks.pop_back(); + } + } + // If we have a group open but still have children remaining, they belong + // to that group instead. + if (hasOpenGroup && childIndex < Children.size()) { + while (childIndex < Children.size()) { + auto &C = Children[childIndex]; + Children[lastGroupIndex].Children.push_back(C); + Children[lastGroupIndex].Symbol.range.mergeWith(C.Symbol.range); + Children.erase(Children.begin() + childIndex); + } } - return std::move(Symbol); } // Add a symbol as a child of the current one. @@ -379,6 +465,19 @@ SymBuilder Root; for (auto &TopLevel : AST.getLocalTopLevelDecls()) traverseDecl(TopLevel, Root); + + // We need to fix up the ranges before adding TextMarks to be sure we add + // them in the correct spots. + Root.standardizeRanges(); + + const auto &SM = AST.getSourceManager(); + const auto &TextMarks = AST.getMarks().Marks; + std::vector TextMarkSyms; + for (auto it = TextMarks.rbegin(); it != TextMarks.rend(); ++it) { + TextMarkSyms.push_back({it->toSymbol(SM), it->IsGroup}); + } + Root.mergeTextMarks(TextMarkSyms, /** IsRoot= */ true); + return std::move(std::move(Root).build().children); } diff --git a/clang-tools-extra/clangd/ParsedAST.h b/clang-tools-extra/clangd/ParsedAST.h --- a/clang-tools-extra/clangd/ParsedAST.h +++ b/clang-tools-extra/clangd/ParsedAST.h @@ -25,6 +25,7 @@ #include "Diagnostics.h" #include "Headers.h" #include "Preamble.h" +#include "TextMarks.h" #include "index/CanonicalIncludes.h" #include "support/Path.h" #include "clang/Frontend/FrontendAction.h" @@ -101,6 +102,8 @@ /// Gets all macro references (definition, expansions) present in the main /// file, including those in the preamble region. const MainFileMacros &getMacros() const; + /// Gets all textual marks in the main file. + const MainFileMarks &getMarks() const; /// Tokens recorded while parsing the main file. /// (!) does not have tokens from the preamble. const syntax::TokenBuffer &getTokens() const { return Tokens; } @@ -121,7 +124,8 @@ std::shared_ptr Preamble, std::unique_ptr Clang, std::unique_ptr Action, syntax::TokenBuffer Tokens, - MainFileMacros Macros, std::vector LocalTopLevelDecls, + MainFileMacros Macros, MainFileMarks Marks, + std::vector LocalTopLevelDecls, llvm::Optional> Diags, IncludeStructure Includes, CanonicalIncludes CanonIncludes); @@ -144,6 +148,8 @@ /// All macro definitions and expansions in the main file. MainFileMacros Macros; + // Textual marks in the main file (e.g. pragma marks, TODOs). + MainFileMarks Marks; // Data, stored after parsing. None if AST was built with a stale preamble. llvm::Optional> Diags; // Top-level decls inside the current file. Not that this does not include diff --git a/clang-tools-extra/clangd/ParsedAST.cpp b/clang-tools-extra/clangd/ParsedAST.cpp --- a/clang-tools-extra/clangd/ParsedAST.cpp +++ b/clang-tools-extra/clangd/ParsedAST.cpp @@ -20,6 +20,7 @@ #include "IncludeFixer.h" #include "Preamble.h" #include "SourceCode.h" +#include "TextMarks.h" #include "TidyProvider.h" #include "index/CanonicalIncludes.h" #include "index/Index.h" @@ -423,6 +424,10 @@ std::make_unique(Clang->getSourceManager(), Macros)); + MainFileMarks Marks; + Clang->getPreprocessor().addPPCallbacks( + std::make_unique(Clang->getSourceManager(), Marks)); + // Copy over the includes from the preamble, then combine with the // non-preamble includes below. CanonicalIncludes CanonIncludes; @@ -484,7 +489,7 @@ } return ParsedAST(Inputs.Version, std::move(Preamble), std::move(Clang), std::move(Action), std::move(Tokens), std::move(Macros), - std::move(ParsedDecls), std::move(Diags), + std::move(Marks), std::move(ParsedDecls), std::move(Diags), std::move(Includes), std::move(CanonIncludes)); } @@ -524,6 +529,7 @@ } const MainFileMacros &ParsedAST::getMacros() const { return Macros; } +const MainFileMarks &ParsedAST::getMarks() const { return Marks; } std::size_t ParsedAST::getUsedBytes() const { auto &AST = getASTContext(); @@ -570,12 +576,14 @@ std::unique_ptr Clang, std::unique_ptr Action, syntax::TokenBuffer Tokens, MainFileMacros Macros, + MainFileMarks Marks, std::vector LocalTopLevelDecls, llvm::Optional> Diags, IncludeStructure Includes, CanonicalIncludes CanonIncludes) : Version(Version), Preamble(std::move(Preamble)), Clang(std::move(Clang)), Action(std::move(Action)), Tokens(std::move(Tokens)), - Macros(std::move(Macros)), Diags(std::move(Diags)), + Macros(std::move(Macros)), Marks(std::move(Marks)), + Diags(std::move(Diags)), LocalTopLevelDecls(std::move(LocalTopLevelDecls)), Includes(std::move(Includes)), CanonIncludes(std::move(CanonIncludes)) { Resolver = std::make_unique(getASTContext()); diff --git a/clang-tools-extra/clangd/Protocol.h b/clang-tools-extra/clangd/Protocol.h --- a/clang-tools-extra/clangd/Protocol.h +++ b/clang-tools-extra/clangd/Protocol.h @@ -194,6 +194,14 @@ bool contains(Range Rng) const { return start <= Rng.start && Rng.end <= end; } + + /// Expand this range to also contain `Rng`. + void mergeWith(Range Rng) { + if (Rng.start < start) + start = Rng.start; + if (end < Rng.end) + end = Rng.end; + } }; bool fromJSON(const llvm::json::Value &, Range &, llvm::json::Path); llvm::json::Value toJSON(const Range &); diff --git a/clang-tools-extra/clangd/TextMarks.h b/clang-tools-extra/clangd/TextMarks.h new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/TextMarks.h @@ -0,0 +1,73 @@ +//===--- TextMarks.h ---------------------------------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_TEXTMARKS_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_TEXTMARKS_H + +#include "AST.h" +#include "Protocol.h" +#include "SourceCode.h" +#include "clang/Basic/IdentifierTable.h" +#include "clang/Lex/PPCallbacks.h" +#include + +namespace clang { +namespace clangd { +/// Represents a programmer specified mark/note, typically used to easily locate +/// or differentiate code. e.g. a `pragma mark`, a `TODO`. +/// +/// TextMarks occupy the entire line; it is not possible to have more than one +/// `TextMark` per line. +struct TextMark { + SourceLocation Loc; + std::string Text; + bool IsGroup; + DocumentSymbol toSymbol(const SourceManager &SM) const; +}; + +/// All `TextMarks` in the main file. Used to display the marks e.g. in the +/// document symbols list. +struct MainFileMarks { + /// Marks from the main file in order. + std::vector Marks; +}; + +/// Collect `#pragma mark` occurences in the main file. +class CollectPragmaMarks : public PPCallbacks { +public: + explicit CollectPragmaMarks(const SourceManager &SM, MainFileMarks &Out) + : SM(SM), Out(Out) {} + void PragmaMark(SourceLocation Loc, StringRef Trivia) override { + if (isInsideMainFile(Loc, SM)) { + StringRef Name = Trivia.trim(); + bool IsGroup = false; + // "-\s+" or "" after an initial trim. The former is + // considered a group, the latter just a mark. We need to include a name + // here since editors won't properly render the symbol otherwise. + StringRef MaybeGroupName = Name; + if (MaybeGroupName.consume_front("-") && + (MaybeGroupName.ltrim() != MaybeGroupName || + MaybeGroupName.empty())) { + Name = + MaybeGroupName.empty() ? "(unnamed group)" : MaybeGroupName.ltrim(); + IsGroup = true; + } else if (Name.empty()) { + Name = "(unnamed mark)"; + } + Out.Marks.emplace_back(TextMark{Loc, Name.str(), IsGroup}); + } + } + +private: + const SourceManager &SM; + MainFileMarks &Out; +}; +} // namespace clangd +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_TEXTMARKS_H diff --git a/clang-tools-extra/clangd/TextMarks.cpp b/clang-tools-extra/clangd/TextMarks.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/TextMarks.cpp @@ -0,0 +1,28 @@ +//===--- TextMarks.cpp -------------------------------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "TextMarks.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Lex/Lexer.h" + +namespace clang { +namespace clangd { + +DocumentSymbol TextMark::toSymbol(const SourceManager &SM) const { + DocumentSymbol Sym; + Sym.name = Text; + Sym.kind = SymbolKind::File; + Position Start = sourceLocToPosition(SM, Loc); + Position End = {Start.line + 1, 0}; + Sym.range = Range{Start, End}; + Sym.selectionRange = Range{Start, End}; + return Sym; +} + +} // namespace clangd +} // namespace clang diff --git a/clang-tools-extra/clangd/unittests/FindSymbolsTests.cpp b/clang-tools-extra/clangd/unittests/FindSymbolsTests.cpp --- a/clang-tools-extra/clangd/unittests/FindSymbolsTests.cpp +++ b/clang-tools-extra/clangd/unittests/FindSymbolsTests.cpp @@ -1027,6 +1027,106 @@ AllOf(WithName("-pur"), WithKind(SymbolKind::Method)))))); } +TEST(DocumentSymbolsTest, PragmaMarkGroups) { + TestTU TU; + TU.ExtraArgs = {"-xobjective-c++", "-Wno-objc-root-class"}; + Annotations Main(R"cpp( + $DogDef[[@interface Dog + @end]] + + $DogImpl[[@implementation Dog + + + (id)sharedDoggo { return 0; } + + #pragma $Overrides[[mark - Overrides + + - (id)init { + return self; + } + - (void)bark {}]] + + #pragma $Specifics[[mark - Dog Specifics + + - (int)isAGoodBoy { + return 1; + }]] + @]]end // FIXME: Why doesn't this include the 'end'? + + #pragma $End[[mark - End +]] + )cpp"); + TU.Code = Main.code().str(); + EXPECT_THAT( + getSymbols(TU.build()), + ElementsAre( + AllOf(WithName("Dog"), SymRange(Main.range("DogDef"))), + AllOf(WithName("Dog"), SymRange(Main.range("DogImpl")), + Children(AllOf(WithName("+sharedDoggo"), + WithKind(SymbolKind::Method)), + AllOf(WithName("Overrides"), + SymRange(Main.range("Overrides")), + Children(AllOf(WithName("-init"), + WithKind(SymbolKind::Method)), + AllOf(WithName("-bark"), + WithKind(SymbolKind::Method)))), + AllOf(WithName("Dog Specifics"), + SymRange(Main.range("Specifics")), + Children(AllOf(WithName("-isAGoodBoy"), + WithKind(SymbolKind::Method)))))), + AllOf(WithName("End"), SymRange(Main.range("End"))))); +} + +TEST(DocumentSymbolsTest, PragmaMarkGroupsNoNesting) { + TestTU TU; + TU.ExtraArgs = {"-xobjective-c++", "-Wno-objc-root-class"}; + Annotations Main(R"cpp( + // FIXME: We miss the mark below, is it in the preamble instead? + // Unclear if it's worth supporting, likely very uncommon. + #pragma mark Helpers + void helpA(id obj) {} + + #pragma mark - + #pragma mark Core + + void coreMethod() {} + )cpp"); + TU.Code = Main.code().str(); + EXPECT_THAT( + getSymbols(TU.build()), + ElementsAre(AllOf(WithName("helpA")), AllOf(WithName("(unnamed group)")), + AllOf(WithName("Core")), AllOf(WithName("coreMethod")))); +} + +TEST(DocumentSymbolsTest, SymbolsAreSorted) { + TestTU TU; + TU.ExtraArgs = {"-xobjective-c++", "-Wno-objc-root-class"}; + Annotations Main(R"cpp( + @interface MYObject + @end + + void someFunctionAbove() {} + + @implementation MYObject + - (id)init { return self; } + + void someHelperFunction() {} + + - (void)retain {} + - (void)release {} + @end + )cpp"); + TU.Code = Main.code().str(); + EXPECT_THAT(getSymbols(TU.build()), + ElementsAre(AllOf(WithName("MYObject")), + AllOf(WithName("someFunctionAbove")), + // FIXME: This should be nested under MYObject below. + AllOf(WithName("someHelperFunction")), + AllOf(WithName("MYObject"), + Children(AllOf(WithName("-init")), + AllOf(WithName("-retain")), + AllOf(WithName("-release")))))); +} + } // namespace } // namespace clangd } // namespace clang diff --git a/clang/include/clang/Lex/PPCallbacks.h b/clang/include/clang/Lex/PPCallbacks.h --- a/clang/include/clang/Lex/PPCallbacks.h +++ b/clang/include/clang/Lex/PPCallbacks.h @@ -492,6 +492,11 @@ Second->PragmaComment(Loc, Kind, Str); } + void PragmaMark(SourceLocation Loc, StringRef Trivia) override { + First->PragmaMark(Loc, Trivia); + Second->PragmaMark(Loc, Trivia); + } + void PragmaDetectMismatch(SourceLocation Loc, StringRef Name, StringRef Value) override { First->PragmaDetectMismatch(Loc, Name, Value);