diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -97,6 +97,7 @@ SemanticHighlighting.cpp SemanticSelection.cpp SourceCode.cpp + SymbolDocumentation.cpp QueryDriverDatabase.cpp TidyProvider.cpp TUScheduler.cpp diff --git a/clang-tools-extra/clangd/SymbolDocumentation.h b/clang-tools-extra/clangd/SymbolDocumentation.h new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/SymbolDocumentation.h @@ -0,0 +1,101 @@ +//===--- SymbolDocumentation.h ==---------------------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Class to parse doxygen comments into a flat structure for consumption +// in e.g. Hover and Code Completion +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SYMBOLDOCUMENTATION_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SYMBOLDOCUMENTATION_H + +#include "clang/AST/ASTContext.h" +#include "clang/AST/Comment.h" +#include "clang/AST/CommentVisitor.h" + +namespace clang { +namespace clangd { + +template struct ParameterDocumentation { + String Name; + String Description; + + ParameterDocumentation toRef() const; + ParameterDocumentation toOwned() const; +}; + +using ParameterDocumentationRef = ParameterDocumentation; +using ParameterDocumentationOwned = ParameterDocumentation; + +/// @brief Represents a parsed doxygen comment. +/// @details Currently there's special handling for the "brief", "param" +/// "returns", "note" and "warning" commands. The content of all other +/// paragraphs will be appended to the #Description field. +/// If you're only interested in the full comment, but with comment +/// markers stripped, use the #CommentText field. +/// \tparam String When built from a declaration, we're building the strings +/// by ourselves, so in this case String==std::string. +/// However, when storing the contents of this class in the index, we need to +/// use llvm::StringRef. To connvert between std::string and llvm::StringRef +/// versions of this class, use toRef() and toOwned(). +template class SymbolDocumentation { +public: + friend class CommentToSymbolDocumentation; + + static SymbolDocumentation descriptionOnly(String &&Description) { + SymbolDocumentation Doc; + Doc.Description = Description; + Doc.CommentText = Description; + return Doc; + } + + /// Constructs with all fields as empty strings/vectors. + SymbolDocumentation() = default; + + SymbolDocumentation toRef() const; + SymbolDocumentation toOwned() const; + + bool empty() const { return CommentText.empty(); } + + /// Paragraph of the "brief" command. + String Brief; + + /// Paragraph of the "return" command. + String Returns; + + /// Paragraph(s) of the "note" command(s) + llvm::SmallVector Notes; + /// Paragraph(s) of the "warning" command(s) + llvm::SmallVector Warnings; + + /// Parsed paragaph(s) of the "param" comamnd(s) + llvm::SmallVector> Parameters; + + /// All the paragraphs we don't have any special handling for, + /// e.g. "details". + String Description; + + /// The full documentation comment with comment markers stripped. + /// See clang::RawComment::getFormattedText() for the detailed + /// explanation of how the comment text is transformed. + String CommentText; +}; + +using SymbolDocumentationOwned = SymbolDocumentation; +using SymbolDocumentationRef = SymbolDocumentation; + +/// @param RC the comment to parse +/// @param D the declaration that \p RC belongs to +/// @return parsed doxgen documentation. +SymbolDocumentationOwned +parseDoxygenComment(const RawComment &RC, const ASTContext &Ctx, const Decl *D); + +} // namespace clangd +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_SYMBOLDOCUMENTATION_H diff --git a/clang-tools-extra/clangd/SymbolDocumentation.cpp b/clang-tools-extra/clangd/SymbolDocumentation.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/SymbolDocumentation.cpp @@ -0,0 +1,212 @@ +//===--- SymbolDocumentation.cpp ==-------------------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SymbolDocumentation.h" +#include "clang/AST/CommentVisitor.h" +#include "llvm/Support/JSON.h" + +namespace clang { +namespace clangd { + +void ensureUTF8(std::string &Str) { + if (!llvm::json::isUTF8(Str)) + Str = llvm::json::fixUTF8(Str); +} + +void ensureUTF8(llvm::MutableArrayRef Strings) { + for (auto &Str : Strings) { + ensureUTF8(Str); + } +} + +class BlockCommentToString + : public comments::ConstCommentVisitor { +public: + BlockCommentToString(std::string &Out, const ASTContext &Ctx) + : Out(Out), Ctx(Ctx) {} + + void visitParagraphComment(const comments::ParagraphComment *C) { + for (const auto *Child = C->child_begin(); Child != C->child_end(); + ++Child) { + visit(*Child); + } + } + + void visitBlockCommandComment(const comments::BlockCommandComment *B) { + Out << (B->getCommandMarker() == (comments::CommandMarkerKind::CMK_At) + ? '@' + : '\\') + << B->getCommandName(Ctx.getCommentCommandTraits()); + + visit(B->getParagraph()); + } + + void visitTextComment(const comments::TextComment *C) { + // If this is the very first node, the paragraph has no doxygen command, + // so there will be a leading space -> Trim it + // Otherwise just trim trailing space + if (Out.str().empty()) + Out << C->getText().trim(); + else + Out << C->getText().rtrim(); + } + + void visitInlineCommandComment(const comments::InlineCommandComment *C) { + const std::string SurroundWith = [C] { + switch (C->getRenderKind()) { + case comments::InlineCommandComment::RenderKind::RenderMonospaced: + return "`"; + case comments::InlineCommandComment::RenderKind::RenderBold: + return "**"; + case comments::InlineCommandComment::RenderKind::RenderEmphasized: + return "*"; + default: + return ""; + } + }(); + + Out << " " << SurroundWith; + for (unsigned I = 0; I < C->getNumArgs(); ++I) { + Out << C->getArgText(I); + } + Out << SurroundWith; + } + +private: + llvm::raw_string_ostream Out; + const ASTContext &Ctx; +}; + +class CommentToSymbolDocumentation + : public comments::ConstCommentVisitor { +public: + CommentToSymbolDocumentation(const RawComment &RC, const ASTContext &Ctx, + const Decl *D, SymbolDocumentationOwned &Doc) + : FullComment(RC.parse(Ctx, nullptr, D)), Output(Doc), Ctx(Ctx) { + + Doc.CommentText = + RC.getFormattedText(Ctx.getSourceManager(), Ctx.getDiagnostics()); + + for (auto *Block : FullComment->getBlocks()) { + visit(Block); + } + } + + void visitBlockCommandComment(const comments::BlockCommandComment *B) { + const llvm::StringRef CommandName = + B->getCommandName(Ctx.getCommentCommandTraits()); + + // Visit B->getParagraph() for commands that we have special fields for, + // so that the command name won't be included in the string. + // Otherwise, we want to keep the command name, so visit B itself. + if (CommandName == "brief") { + BlockCommentToString(Output.Brief, Ctx).visit(B->getParagraph()); + } else if (CommandName == "return") { + BlockCommentToString(Output.Returns, Ctx).visit(B->getParagraph()); + } else if (CommandName == "warning") { + BlockCommentToString(Output.Warnings.emplace_back(), Ctx) + .visit(B->getParagraph()); + } else if (CommandName == "note") { + BlockCommentToString(Output.Notes.emplace_back(), Ctx) + .visit(B->getParagraph()); + } else { + if (!Output.Description.empty()) + Output.Description += "\n\n"; + + BlockCommentToString(Output.Description, Ctx).visit(B); + } + } + + void visitParagraphComment(const comments::ParagraphComment *P) { + BlockCommentToString(Output.Description, Ctx).visit(P); + } + + void visitParamCommandComment(const comments::ParamCommandComment *P) { + if (P->hasParamName() && P->hasNonWhitespaceParagraph()) { + ParameterDocumentationOwned Doc; + Doc.Name = P->getParamNameAsWritten().str(); + BlockCommentToString(Doc.Description, Ctx).visit(P->getParagraph()); + Output.Parameters.push_back(std::move(Doc)); + } + } + +private: + comments::FullComment *FullComment; + SymbolDocumentationOwned &Output; + const ASTContext &Ctx; +}; + +SymbolDocumentationOwned parseDoxygenComment(const RawComment &RC, + const ASTContext &Ctx, + const Decl *D) { + SymbolDocumentationOwned Doc; + CommentToSymbolDocumentation(RC, Ctx, D, Doc); + + // Clang requires source to be UTF-8, but doesn't enforce this in comments. + ensureUTF8(Doc.Brief); + ensureUTF8(Doc.Returns); + + ensureUTF8(Doc.Notes); + ensureUTF8(Doc.Warnings); + + for (auto &Param : Doc.Parameters) { + ensureUTF8(Param.Name); + ensureUTF8(Param.Description); + } + + ensureUTF8(Doc.Description); + ensureUTF8(Doc.CommentText); + + return Doc; +} + +template struct ParameterDocumentation; +template struct ParameterDocumentation; + +template +SymbolDocumentation convert(const SymbolDocumentation &In) { + SymbolDocumentation Doc; + + Doc.Brief = In.Brief; + Doc.Returns = In.Returns; + + Doc.Notes.reserve(In.Notes.size()); + for (const auto &Note : In.Notes) { + Doc.Notes.emplace_back(Note); + } + + Doc.Warnings.reserve(In.Warnings.size()); + for (const auto &Warning : In.Warnings) { + Doc.Warnings.emplace_back(Warning); + } + + Doc.Parameters.reserve(In.Parameters.size()); + for (const auto &ParamDoc : In.Parameters) { + Doc.Parameters.emplace_back(ParameterDocumentation{ + StrOut(ParamDoc.Name), StrOut(ParamDoc.Description)}); + } + + Doc.Description = In.Description; + Doc.CommentText = In.CommentText; + + return Doc; +} + +template <> SymbolDocumentationRef SymbolDocumentationOwned::toRef() const { + return convert(*this); +} + +template <> SymbolDocumentationOwned SymbolDocumentationRef::toOwned() const { + return convert(*this); +} + +template class SymbolDocumentation; +template class SymbolDocumentation; + +} // namespace clangd +} // namespace clang diff --git a/clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp --- a/clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp +++ b/clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "CodeCompletionStrings.h" +#include "SymbolDocumentationMatchers.h" #include "TestTU.h" #include "clang/Sema/CodeCompleteConsumer.h" #include "gmock/gmock.h" @@ -65,6 +66,90 @@ getDeclComment(AST.getASTContext(), findDecl(AST, "X"))); } +TEST_F(CompletionStringTest, DoxygenParsing) { + struct { + const char *const Code; + const std::function ExpectedBuilder; + } Cases[] = { + {R"cpp( + // Hello world + void foo(); + )cpp", + [](SymbolDocumentationOwned &Doc) { Doc.Description = "Hello world"; }}, + {R"cpp( + /*! + * \brief brief + * \details details + */ + void foo(); + )cpp", + [](SymbolDocumentationOwned &Doc) { + Doc.Brief = "brief"; + Doc.Description = "\\details details"; + }}, + {R"cpp( + /** + * @brief brief + * @details details + * @see somewhere else + */ + void foo(); + )cpp", + [](SymbolDocumentationOwned &Doc) { + Doc.Brief = "brief"; + Doc.Description = "@details details\n\n@see somewhere else"; + }}, + {R"cpp( + /*! + * @brief brief + * @details details + * @param foo foodoc + * @throws ball at hoop + * @note note1 + * @warning warning1 + * @note note2 + * @warning warning2 + * @param bar bardoc + * @return something + */ + void foo(); + )cpp", + [](SymbolDocumentationOwned &Doc) { + Doc.Brief = "brief"; + Doc.Description = "@details details\n\n@throws ball at hoop"; + Doc.Parameters = {{"foo", "foodoc"}, {"bar", "bardoc"}}; + Doc.Warnings = {"warning1", "warning2"}; + Doc.Notes = {"note1", "note2"}; + Doc.Returns = "something"; + }}, + {R"cpp( + /// @brief Here's \b bold \e italic and \p code + int foo; + )cpp", + [](SymbolDocumentationOwned &Doc) { + Doc.Brief = "Here's **bold** *italic* and `code`"; + }}}; + + for (const auto &Case : Cases) { + SCOPED_TRACE(Case.Code); + + auto TU = TestTU::withCode(Case.Code); + auto AST = TU.build(); + auto &Ctx = AST.getASTContext(); + const auto &Decl = findDecl(AST, "foo"); + + SymbolDocumentationOwned ExpectedDoc; + ExpectedDoc.CommentText = + getCompletionComment(Ctx, &Decl) + ->getFormattedText(Ctx.getSourceManager(), Ctx.getDiagnostics()); + Case.ExpectedBuilder(ExpectedDoc); + + const RawComment *RC = getCompletionComment(Ctx, &Decl); + EXPECT_THAT(RC, testing::NotNull()); + EXPECT_THAT(parseDoxygenComment(*RC, Ctx, &Decl), matchesDoc(ExpectedDoc)); + } +} + TEST_F(CompletionStringTest, MultipleAnnotations) { Builder.AddAnnotation("Ano1"); Builder.AddAnnotation("Ano2"); diff --git a/clang-tools-extra/clangd/unittests/SymbolDocumentationMatchers.h b/clang-tools-extra/clangd/unittests/SymbolDocumentationMatchers.h new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/unittests/SymbolDocumentationMatchers.h @@ -0,0 +1,51 @@ +//===-- SymbolDocumentationMatchers.h ---------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// GMock matchers for the SymbolDocumentation class +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SYMBOLDOCUMENTATION_MATCHERS_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SYMBOLDOCUMENTATION_MATCHERS_H +#include "SymbolDocumentation.h" +#include "gmock/gmock.h" + +namespace clang { +namespace clangd { + +template +testing::Matcher> +matchesDoc(const SymbolDocumentation &Expected) { + using namespace ::testing; + + std::vector>> ParamMatchers; + for (const auto &P : Expected.Parameters) + ParamMatchers.push_back( + AllOf(Field("Name", &ParameterDocumentation::Name, P.Name), + Field("Description", &ParameterDocumentation::Description, + P.Description))); + + return AllOf( + Field("Brief", &SymbolDocumentation::Brief, Expected.Brief), + Field("Returns", &SymbolDocumentation::Returns, Expected.Returns), + Field("Notes", &SymbolDocumentation::Notes, + ElementsAreArray(Expected.Notes)), + Field("Warnings", &SymbolDocumentation::Warnings, + ElementsAreArray(Expected.Warnings)), + Field("Parameters", &SymbolDocumentation::Parameters, + ElementsAreArray(ParamMatchers)), + Field("Description", &SymbolDocumentation::Description, + Expected.Description), + Field("CommentText", &SymbolDocumentation::CommentText, + Expected.CommentText)); +} + +} // namespace clangd +} // namespace clang + +#endif diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn --- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/BUILD.gn @@ -117,6 +117,7 @@ "SemanticHighlighting.cpp", "SemanticSelection.cpp", "SourceCode.cpp", + "SymbolDocumentation.cpp" "TUScheduler.cpp", "TidyProvider.cpp", "URI.cpp",