diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -170,6 +170,8 @@ clangTidy clangdSupport + + clangPseudo ) if(CLANGD_TIDY_CHECKS) target_link_libraries(clangDaemon PRIVATE ${ALL_CLANG_TIDY_CHECKS}) diff --git a/clang-tools-extra/clangd/SemanticSelection.h b/clang-tools-extra/clangd/SemanticSelection.h --- a/clang-tools-extra/clangd/SemanticSelection.h +++ b/clang-tools-extra/clangd/SemanticSelection.h @@ -15,6 +15,7 @@ #include "ParsedAST.h" #include "Protocol.h" #include "llvm/Support/Error.h" +#include #include namespace clang { namespace clangd { @@ -29,6 +30,11 @@ /// This should include large scopes, preprocessor blocks etc. llvm::Expected> getFoldingRanges(ParsedAST &AST); +/// Returns a list of ranges whose contents might be collapsible in an editor. +/// This version uses the pseudoparser which does not require the AST. +llvm::Expected> +getFoldingRanges(const std::string &Code); + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/SemanticSelection.cpp b/clang-tools-extra/clangd/SemanticSelection.cpp --- a/clang-tools-extra/clangd/SemanticSelection.cpp +++ b/clang-tools-extra/clangd/SemanticSelection.cpp @@ -11,6 +11,9 @@ #include "Protocol.h" #include "Selection.h" #include "SourceCode.h" +#include "clang-pseudo/Bracket.h" +#include "clang-pseudo/DirectiveTree.h" +#include "clang-pseudo/Token.h" #include "clang/AST/DeclBase.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" @@ -162,5 +165,46 @@ return collectFoldingRanges(SyntaxTree, AST.getSourceManager()); } +// FIXME(kirillbobyrev): Collect comments, PP conditional regions, includes and +// other code regions (e.g. public/private/protected sections of classes, +// control flow statement bodies). +// Related issue: https://github.com/clangd/clangd/issues/310 +llvm::Expected> +getFoldingRanges(const std::string &Code) { + auto OrigStream = clang::pseudo::lex(Code, clang::pseudo::genericLangOpts()); + + auto DirectiveStructure = clang::pseudo::DirectiveTree::parse(OrigStream); + clang::pseudo::chooseConditionalBranches(DirectiveStructure, OrigStream); + + // FIXME: Provide ranges in the disabled-PP regions as well. + auto Preprocessed = DirectiveStructure.stripDirectives(OrigStream); + + auto ParseableStream = cook(Preprocessed, clang::pseudo::genericLangOpts()); + pseudo::pairBrackets(ParseableStream); + + std::vector Result; + for (const auto &Tok : ParseableStream.tokens()) { + if (auto *Paired = Tok.pair()) { + // Process only token at the start of the range. Avoid ranges on a single + // line. + if (Tok.Line < Paired->Line) { + Position Start = offsetToPosition( + Code, + OrigStream.tokens()[Tok.OriginalIndex].text().data() - Code.data()); + Position End = offsetToPosition( + Code, OrigStream.tokens()[Paired->OriginalIndex].text().data() - + Code.data()); + FoldingRange FR; + FR.startLine = Start.line; + FR.startCharacter = Start.character + 1; + FR.endLine = End.line; + FR.endCharacter = End.character; + Result.push_back(FR); + } + } + } + return Result; +} + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp b/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp --- a/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp +++ b/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp @@ -265,6 +265,86 @@ } } +TEST(FoldingRangesPseudoParser, All) { + const char *Tests[] = { + R"cpp( + #define FOO int foo() {\ + int Variable = 42; \ + } + + // Do not generate folding range for braces within macro expansion. + FOO + + // Do not generate folding range within macro arguments. + #define FUNCTOR(functor) functor + void func() {[[ + FUNCTOR([](){}); + ]]} + + // Do not generate folding range with a brace coming from macro. + #define LBRACE { + void bar() LBRACE + int X = 42; + } + )cpp", + R"cpp( + void func() {[[ + int Variable = 100; + + if (Variable > 5) {[[ + Variable += 42; + ]]} else if (Variable++) + ++Variable; + else {[[ + Variable--; + ]]} + + // Do not generate FoldingRange for empty CompoundStmts. + for (;;) {} + + // If there are newlines between {}, we should generate one. + for (;;) {[[ + + ]]} + ]]} + )cpp", + R"cpp( + class Foo {[[ + public: + Foo() {[[ + int X = 1; + ]]} + + private: + int getBar() {[[ + return 42; + ]]} + + // Braces are located at the same line: no folding range here. + void getFooBar() { } + ]]}; + )cpp", + R"cpp( + // Range boundaries on escaped newlines. + class Foo \ + \ + {[[ \ + public: + Foo() {[[\ + int X = 1; + ]]} \ + ]]}; + )cpp", + }; + for (const char *Test : Tests) { + auto T = Annotations(Test); + EXPECT_THAT( + gatherFoldingRanges(llvm::cantFail(getFoldingRanges(T.code().str()))), + UnorderedElementsAreArray(T.ranges())) + << Test; + } +} + } // namespace } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/Token.h b/clang-tools-extra/pseudo/include/clang-pseudo/Token.h --- a/clang-tools-extra/pseudo/include/clang-pseudo/Token.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/Token.h @@ -67,6 +67,8 @@ uint8_t Indent = 0; /// Flags have some meaning defined by the function that produced this stream. uint8_t Flags = 0; + /// Index into the original token stream (as raw-lexed from the source code). + Index OriginalIndex = Invalid; // Helpers to get/set Flags based on `enum class`. template bool flag(T Mask) const { return Flags & uint8_t{static_cast>(Mask)}; @@ -96,7 +98,7 @@ /// If this token is a paired bracket, the offset of the pair in the stream. int32_t Pair = 0; }; -static_assert(sizeof(Token) <= sizeof(char *) + 20, "Careful with layout!"); +static_assert(sizeof(Token) <= sizeof(char *) + 24, "Careful with layout!"); llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Token &); /// A half-open range of tokens within a stream. diff --git a/clang-tools-extra/pseudo/lib/CMakeLists.txt b/clang-tools-extra/pseudo/lib/CMakeLists.txt --- a/clang-tools-extra/pseudo/lib/CMakeLists.txt +++ b/clang-tools-extra/pseudo/lib/CMakeLists.txt @@ -17,3 +17,7 @@ clangLex clangPseudoGrammar ) + + target_include_directories(clangPseudo INTERFACE + $ + ) diff --git a/clang-tools-extra/pseudo/lib/Lex.cpp b/clang-tools-extra/pseudo/lib/Lex.cpp --- a/clang-tools-extra/pseudo/lib/Lex.cpp +++ b/clang-tools-extra/pseudo/lib/Lex.cpp @@ -26,6 +26,8 @@ TokenStream Result; clang::Token CT; + // Index into the token stream of original source code. + Token::Index TokenIndex = 0; unsigned LastOffset = 0; unsigned Line = 0; unsigned Indent = 0; @@ -66,6 +68,7 @@ if (CT.needsCleaning() || CT.hasUCN()) Tok.setFlag(LexFlags::NeedsCleaning); + Tok.OriginalIndex = TokenIndex++; Result.push(Tok); LastOffset = Offset; } diff --git a/clang-tools-extra/pseudo/unittests/TokenTest.cpp b/clang-tools-extra/pseudo/unittests/TokenTest.cpp --- a/clang-tools-extra/pseudo/unittests/TokenTest.cpp +++ b/clang-tools-extra/pseudo/unittests/TokenTest.cpp @@ -31,6 +31,10 @@ return arg.Line == (unsigned)Line && arg.Indent == (unsigned)Indent; } +MATCHER_P(originalIndex, index, "") { + return arg.OriginalIndex == (Token::Index)index; +} + TEST(TokenTest, Lex) { LangOptions Opts; std::string Code = R"cpp( @@ -105,20 +109,23 @@ Raw.tokens(), ElementsAre(AllOf(token("one_\\\ntoken", tok::raw_identifier), hasFlag(LexFlags::StartsPPLine), - hasFlag(LexFlags::NeedsCleaning), lineIndent(1, 0)), + hasFlag(LexFlags::NeedsCleaning), lineIndent(1, 0), + originalIndex(0)), AllOf(token("two", tok::raw_identifier), hasFlag(LexFlags::StartsPPLine), - Not(hasFlag(LexFlags::NeedsCleaning))), + Not(hasFlag(LexFlags::NeedsCleaning)), + originalIndex(1)), AllOf(token("\\\ntokens", tok::raw_identifier), Not(hasFlag(LexFlags::StartsPPLine)), - hasFlag(LexFlags::NeedsCleaning)))); + hasFlag(LexFlags::NeedsCleaning), originalIndex(2)))); TokenStream Cooked = cook(Raw, Opts); EXPECT_THAT( Cooked.tokens(), - ElementsAre(AllOf(token("one_token", tok::identifier), lineIndent(1, 0)), - token("two", tok::identifier), - token("tokens", tok::identifier))); + ElementsAre(AllOf(token("one_token", tok::identifier), lineIndent(1, 0), + originalIndex(0)), + AllOf(token("two", tok::identifier), originalIndex(1)), + AllOf(token("tokens", tok::identifier), originalIndex(2)))); } TEST(TokenTest, EncodedCharacters) { @@ -182,13 +189,14 @@ )cpp"; TokenStream Cook = cook(lex(Code, Opts), Opts); TokenStream Split = stripComments(Cook); - EXPECT_THAT(Split.tokens(), ElementsAreArray({ - token(">", tok::greater), - token(">", tok::greater), - token(">", tok::greater), - token(">", tok::greater), - token(">>=", tok::greatergreaterequal), - })); + EXPECT_THAT(Split.tokens(), + ElementsAre(AllOf(token(">", tok::greater), originalIndex(0)), + AllOf(token(">", tok::greater), originalIndex(0)), + // Token 1 and 2 are comments. + AllOf(token(">", tok::greater), originalIndex(3)), + AllOf(token(">", tok::greater), originalIndex(3)), + AllOf(token(">>=", tok::greatergreaterequal), + originalIndex(4)))); } TEST(TokenTest, DropComments) { @@ -199,13 +207,16 @@ )cpp"; TokenStream Raw = cook(lex(Code, Opts), Opts); TokenStream Stripped = stripComments(Raw); - EXPECT_THAT(Raw.tokens(), - ElementsAreArray( - {token("// comment", tok::comment), token("int", tok::kw_int), - token("/*abc*/", tok::comment), token(";", tok::semi)})); - - EXPECT_THAT(Stripped.tokens(), ElementsAreArray({token("int", tok::kw_int), - token(";", tok::semi)})); + EXPECT_THAT( + Raw.tokens(), + ElementsAre(AllOf(token("// comment", tok::comment), originalIndex(0)), + AllOf(token("int", tok::kw_int), originalIndex(1)), + AllOf(token("/*abc*/", tok::comment), originalIndex(2)), + AllOf(token(";", tok::semi), originalIndex(3)))); + + EXPECT_THAT(Stripped.tokens(), + ElementsAre(AllOf(token("int", tok::kw_int), originalIndex(1)), + AllOf(token(";", tok::semi), originalIndex(3)))); } } // namespace