diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -62,6 +62,7 @@ Quality.cpp RIFF.cpp Selection.cpp + SemanticHighlight.cpp SourceCode.cpp Threading.cpp Trace.cpp diff --git a/clang-tools-extra/clangd/SemanticHighlight.h b/clang-tools-extra/clangd/SemanticHighlight.h new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/SemanticHighlight.h @@ -0,0 +1,67 @@ +//===--- SemanticHighlight.h - Generating highlights from the AST +//-----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Code for collecting semantic symbols from the C++ AST using the +// RecursiveASTVisitor +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SEMANTICSYMBOLASTCOLLECTOR_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SEMANTICSYMBOLASTCOLLECTOR_H + +#include "AST.h" +#include "Headers.h" +#include "Protocol.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/Lex/Lexer.h" + +namespace clang { +namespace clangd { + +// ScopeIndex represents the mapping from the scopes list to a type of +// expression. +enum class SemanticScope : int { + VariableDeclaration = 0, + FunctionDeclaration = 1, +}; + +// Contains all information needed for the highlighting a symbol. +struct SemanticToken { + SemanticToken() {} + SemanticToken(SemanticScope Scope, Position StartPosition, unsigned int Len) + : Scope(Scope), StartPosition(StartPosition), Len(Len) {} + SemanticScope Scope; + Position StartPosition; + unsigned int Len; +}; + +bool operator==(const SemanticToken &Lhs, const SemanticToken &Rhs); +bool operator!=(const SemanticToken &Lhs, const SemanticToken &Rhs); + +// Contains all highlights in a single line. +struct LineHighlight { + LineHighlight(unsigned int Line = 0, std::vector Tokens = {}) + : Line(Line), Tokens(Tokens) {} + unsigned int Line; + std::vector Tokens; +}; + +llvm::json::Value toJSON(const LineHighlight &Highlight); +bool operator==(const LineHighlight &Lhs, const LineHighlight &Rhs); +bool operator!=(const LineHighlight &Lhs, const LineHighlight &Rhs); + +// Returns semantic highlights for the AST. The vector is ordered in ascending +// order by the line number. Every symbol in LineHighlight is ordered in +// ascending order by their coumn number. +std::vector getASTHighlights(ASTContext &AST); +std::vector> getSemanticScopes(); + +} // namespace clangd +} // namespace clang + +#endif diff --git a/clang-tools-extra/clangd/SemanticHighlight.cpp b/clang-tools-extra/clangd/SemanticHighlight.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/SemanticHighlight.cpp @@ -0,0 +1,151 @@ +#include "SemanticHighlight.h" + +namespace clang { +namespace clangd { +namespace { +// Collects all semantic symbols in an ASTContext. Symbols on line i are always +// in front of symbols on line i+1 +class SemanticSymbolASTCollector + : public RecursiveASTVisitor { + std::vector Symbols; + const ASTContext &AST; + const SourceManager &SM; + +public: + SemanticSymbolASTCollector(const ASTContext &AST) + : AST(AST), SM(AST.getSourceManager()) {} + + std::vector getSymbols() { return Symbols; } + + bool VisitVarDecl(VarDecl *Var) { + addSymbol(Var, SemanticScope::VariableDeclaration); + return true; + } + + bool VisitFunctionDecl(FunctionDecl *Func) { + addSymbol(Func, SemanticScope::FunctionDeclaration); + return true; + } + +private: + void addSymbol(Decl *D, SemanticScope Scope) { + auto Loc = D->getLocation(); + SemanticToken S; + auto LSPLoc = sourceLocToPosition(SM, Loc); + + S.Len = clang::Lexer::MeasureTokenLength(Loc, SM, AST.getLangOpts()); + if (S.Len == 0) { + // Don't add symbols that don't have any length. + return; + } + + S.StartPosition.character = LSPLoc.character; + S.StartPosition.line = LSPLoc.line; + S.Scope = Scope; + + Symbols.push_back(S); + } +}; + +} // namespace + +bool operator==(const SemanticToken &Lhs, const SemanticToken &Rhs) { + return Lhs.Scope == Rhs.Scope && Lhs.StartPosition == Rhs.StartPosition && + Lhs.Len == Rhs.Len; +} +bool operator!=(const SemanticToken &Lhs, const SemanticToken &Rhs) { + return !(Lhs == Rhs); +} + +bool operator==(const LineHighlight &Lhs, const LineHighlight &Rhs) { + return Lhs.Line == Rhs.Line && Lhs.Tokens == Rhs.Tokens; +} +bool operator!=(const LineHighlight &Lhs, const LineHighlight &Rhs) { + return !(Lhs == Rhs); +} + +// Encode binary data into base64. +// This was copied from compiler-rt/lib/fuzzer/FuzzerUtil.cpp. +// FIXME: Factor this out into llvm/Support? +std::string encodeBase64(const llvm::SmallVectorImpl &U) { + static const char Table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + std::string Res; + size_t I; + for (I = 0; I + 2 < U.size(); I += 3) { + uint32_t X = (U[I] << 16) + (U[I + 1] << 8) + U[I + 2]; + Res += Table[(X >> 18) & 63]; + Res += Table[(X >> 12) & 63]; + Res += Table[(X >> 6) & 63]; + Res += Table[X & 63]; + } + if (I + 1 == U.size()) { + uint32_t X = (U[I] << 16); + Res += Table[(X >> 18) & 63]; + Res += Table[(X >> 12) & 63]; + Res += "=="; + } else if (I + 2 == U.size()) { + uint32_t X = (U[I] << 16) + (U[I + 1] << 8); + Res += Table[(X >> 18) & 63]; + Res += Table[(X >> 12) & 63]; + Res += Table[(X >> 6) & 63]; + Res += "="; + } + return Res; +} + +void write32be(uint32_t I, llvm::raw_ostream &OS) { + char Buf[4]; + llvm::support::endian::write32be(Buf, I); + OS.write(Buf, sizeof(Buf)); +} + +void write16be(uint16_t I, llvm::raw_ostream &OS) { + char Buf[2]; + llvm::support::endian::write16be(Buf, I); + OS.write(Buf, sizeof(Buf)); +} + +llvm::json::Value toJSON(const LineHighlight &Highlight) { + llvm::SmallVector BinaryHighlights; + llvm::raw_svector_ostream OS(BinaryHighlights); + + for (size_t I = 0, End = Highlight.Tokens.size(); I < End; I++) { + // Each token should consists of 2 32 bit integers. The first integer is the + // start column of the token. The second integer's first 16 bits are the + // length of the token. The rest of the second integer is the scope index of + // the token. + write32be(Highlight.Tokens[I].StartPosition.character, OS); + write16be(Highlight.Tokens[I].Len, OS); + write16be(static_cast(Highlight.Tokens[I].Scope), OS); + } + + return llvm::json::Object{{"line", Highlight.Line}, + {"tokens", encodeBase64(BinaryHighlights)}}; +} + +std::vector> getSemanticScopes() { + return {{"variable"}, {"entity.name.function"}}; +} + +std::vector getASTHighlights(ASTContext &AST) { + SemanticSymbolASTCollector Collector(AST); + Collector.TraverseAST(AST); + auto Symbols = Collector.getSymbols(); + std::vector Lines; + int LastLine = -1; + // Split the vector of symbols into lines + for (const auto &Symbol : Symbols) { + if (Symbol.StartPosition.line != LastLine) { + Lines.push_back(LineHighlight(Symbol.StartPosition.line)); + LastLine = Symbol.StartPosition.line; + } + + Lines.back().Tokens.push_back(Symbol); + } + + return Lines; +} +} // namespace clangd +} // namespace clang diff --git a/clang-tools-extra/clangd/unittests/CMakeLists.txt b/clang-tools-extra/clangd/unittests/CMakeLists.txt --- a/clang-tools-extra/clangd/unittests/CMakeLists.txt +++ b/clang-tools-extra/clangd/unittests/CMakeLists.txt @@ -53,6 +53,7 @@ RenameTests.cpp RIFFTests.cpp SelectionTests.cpp + SemanticHighlightTests.cpp SerializationTests.cpp SourceCodeTests.cpp SymbolCollectorTests.cpp diff --git a/clang-tools-extra/clangd/unittests/SemanticHighlightTests.cpp b/clang-tools-extra/clangd/unittests/SemanticHighlightTests.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/unittests/SemanticHighlightTests.cpp @@ -0,0 +1,76 @@ +//===-- SemanticSymbolASTCollectorTests.cpp - SemanticSymbolASTCollector tests +//------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Annotations.h" +#include "ClangdUnit.h" +#include "Protocol.h" +#include "SemanticHighlight.h" +#include "SourceCode.h" +#include "TestTU.h" +#include "llvm/Support/ScopedPrinter.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace clang { +namespace clangd { +namespace { + +using ::testing::ElementsAreArray; + +Position createPosition(int Line, int Character) { + Position Pos; + Pos.character = Character; + Pos.line = Line; + return Pos; +} + +TEST(SemanticSymbolASTCollector, GetBeginningOfIdentifier) { + std::string Preamble = R"cpp( + struct A { + double SomeMember; + }; + void $foo[[foo]](int $a[[a]]) { + auto $vlvn[[VeryLongVariableName]] = 12312; + A $aa[[aa]]; + } + )cpp"; + + Annotations Test(Preamble); + auto Foo = Test.range("foo"); + auto A = Test.range("a"); + auto VeryLong = Test.range("vlvn"); + auto AA = Test.range("aa"); + std::vector CorrectLines = std::vector{ + LineHighlight( + Foo.start.line, + {SemanticToken(SemanticScope::FunctionDeclaration, + createPosition(Foo.start.line, Foo.start.character), + 3), + SemanticToken(SemanticScope::VariableDeclaration, + createPosition(A.start.line, A.start.character), 1)}), + LineHighlight( + VeryLong.start.line, + {SemanticToken( + SemanticScope::VariableDeclaration, + createPosition(VeryLong.start.line, VeryLong.start.character), + VeryLong.end.character - VeryLong.start.character)}), + LineHighlight( + AA.start.line, + {SemanticToken(SemanticScope::VariableDeclaration, + createPosition(AA.start.line, AA.start.character), + 2)})}; + + auto AST = TestTU::withCode(Test.code()).build(); + auto Lines = getASTHighlights(AST.getASTContext()); + EXPECT_THAT(Lines, ElementsAreArray(CorrectLines)); +} + +} // namespace +} // namespace clangd +} // namespace clang