diff --git a/clang/include/clang/AST/RawCommentList.h b/clang/include/clang/AST/RawCommentList.h --- a/clang/include/clang/AST/RawCommentList.h +++ b/clang/include/clang/AST/RawCommentList.h @@ -139,6 +139,21 @@ std::string getFormattedText(const SourceManager &SourceMgr, DiagnosticsEngine &Diags) const; + struct CommentLine { + std::string Text; + PresumedLoc Begin; + PresumedLoc End; + + CommentLine(StringRef Text, PresumedLoc Begin, PresumedLoc End) + : Text(Text), Begin(Begin), End(End) {} + }; + + /// Returns sanitized comment text as separated lines with locations in + /// source, suitable for further processing and rendering requiring source + /// locations. + std::vector getFormattedLines(const SourceManager &SourceMgr, + DiagnosticsEngine &Diags) const; + /// Parse the comment, assuming it is attached to decl \c D. comments::FullComment *parse(const ASTContext &Context, const Preprocessor *PP, const Decl *D) const; diff --git a/clang/include/clang/Frontend/FrontendActions.h b/clang/include/clang/Frontend/FrontendActions.h --- a/clang/include/clang/Frontend/FrontendActions.h +++ b/clang/include/clang/Frontend/FrontendActions.h @@ -271,12 +271,6 @@ bool usesPreprocessorOnly() const override { return true; } }; -class ExtractAPIAction : public ASTFrontendAction { -protected: - std::unique_ptr CreateASTConsumer(CompilerInstance &CI, - StringRef InFile) override; -}; - //===----------------------------------------------------------------------===// // Preprocessor Actions //===----------------------------------------------------------------------===// diff --git a/clang/include/clang/SymbolGraph/API.h b/clang/include/clang/SymbolGraph/API.h new file mode 100644 --- /dev/null +++ b/clang/include/clang/SymbolGraph/API.h @@ -0,0 +1,138 @@ +//===- SymbolGraph/API.h ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Defines SymbolGraph API records. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_SYMBOLGRAPH_API_H +#define LLVM_CLANG_SYMBOLGRAPH_API_H + +#include "clang/AST/Decl.h" +#include "clang/AST/RawCommentList.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/SymbolGraph/AvailabilityInfo.h" +#include "clang/SymbolGraph/DeclarationFragments.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" + +namespace clang { +namespace symbolgraph { + +using DocComment = std::vector; + +struct APIRecord { + StringRef Name; + StringRef USR; + PresumedLoc Location; + AvailabilityInfo Availability; + LinkageInfo Linkage; + DocComment Comment; + DeclarationFragments Declaration; + DeclarationFragments SubHeading; + + /// Discriminator for LLVM-style RTTI (dyn_cast<> et al.) + enum RecordKind { + RK_Global, + }; + +private: + const RecordKind Kind; + +public: + RecordKind getKind() const { return Kind; } + + APIRecord() = delete; + + APIRecord(RecordKind Kind, StringRef Name, StringRef USR, + PresumedLoc Location, const AvailabilityInfo &Availability, + LinkageInfo Linkage, const DocComment &Comment, + DeclarationFragments Declaration, DeclarationFragments SubHeading) + : Name(Name), USR(USR), Location(Location), Availability(Availability), + Linkage(Linkage), Comment(Comment), Declaration(Declaration), + SubHeading(SubHeading), Kind(Kind) {} + + // Pure virtual destructor to make APIRecord abstract + virtual ~APIRecord() = 0; +}; + +enum class GVKind : uint8_t { + Unknown = 0, + Variable = 1, + Function = 2, +}; + +struct GlobalRecord : APIRecord { + GVKind GlobalKind; + FunctionSignature Signature; + + GlobalRecord(GVKind Kind, StringRef Name, StringRef USR, PresumedLoc Loc, + const AvailabilityInfo &Availability, LinkageInfo Linkage, + const DocComment &Comment, DeclarationFragments Declaration, + DeclarationFragments SubHeading, FunctionSignature Signature) + : APIRecord(RK_Global, Name, USR, Loc, Availability, Linkage, Comment, + Declaration, SubHeading), + GlobalKind(Kind), Signature(Signature) {} + + static bool classof(const APIRecord *Record) { + return Record->getKind() == RK_Global; + } +}; + +class API { +public: + API(const llvm::Triple &Target, const LangOptions &LangOpts) + : Target(Target), LangOpts(LangOpts) {} + + const llvm::Triple &getTarget() const { return Target; } + const LangOptions &getLangOpts() const { return LangOpts; } + + GlobalRecord *addGlobal(GVKind Kind, StringRef Name, StringRef USR, + PresumedLoc Loc, const AvailabilityInfo &Availability, + LinkageInfo Linkage, const DocComment &Comment, + DeclarationFragments Declaration, + DeclarationFragments SubHeading, + FunctionSignature Signature); + + GlobalRecord *addGlobalVar(StringRef Name, StringRef USR, PresumedLoc Loc, + const AvailabilityInfo &Availability, + LinkageInfo Linkage, const DocComment &Comment, + DeclarationFragments Declaration, + DeclarationFragments SubHeading); + + GlobalRecord *addFunction(StringRef Name, StringRef USR, PresumedLoc Loc, + const AvailabilityInfo &Availability, + LinkageInfo Linkage, const DocComment &Comment, + DeclarationFragments Declaration, + DeclarationFragments SubHeading, + FunctionSignature Signature); + + StringRef recordUSR(const Decl *D); + StringRef copyString(StringRef String, llvm::BumpPtrAllocator &Allocator); + StringRef copyString(StringRef String); + + using GlobalRecordMap = llvm::MapVector; + + const GlobalRecordMap &getGlobals() const { return Globals; } + +private: + llvm::BumpPtrAllocator Allocator; + const llvm::Triple Target; + const LangOptions LangOpts; + + GlobalRecordMap Globals; +}; + +} // namespace symbolgraph +} // namespace clang + +#endif // LLVM_CLANG_SYMBOLGRAPH_API_H diff --git a/clang/include/clang/SymbolGraph/AvailabilityInfo.h b/clang/include/clang/SymbolGraph/AvailabilityInfo.h new file mode 100644 --- /dev/null +++ b/clang/include/clang/SymbolGraph/AvailabilityInfo.h @@ -0,0 +1,66 @@ +//===- SymbolGraph/AvailabilityInfo.h - Availability Info -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Defines the Availability Info for a declaration. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_SYMBOLGRAPH_AVAILABILITY_INFO_H +#define LLVM_CLANG_SYMBOLGRAPH_AVAILABILITY_INFO_H + +#include "llvm/Support/Error.h" +#include "llvm/Support/VersionTuple.h" +#include "llvm/Support/raw_ostream.h" + +using llvm::VersionTuple; + +namespace clang { +namespace symbolgraph { + +struct AvailabilityInfo { + VersionTuple Introduced; + VersionTuple Deprecated; + VersionTuple Obsoleted; + bool Unavailable{false}; + bool UnconditionallyDeprecated{false}; + bool UnconditionallyUnavailable{false}; + + explicit AvailabilityInfo(bool Unavailable = false) + : Unavailable(Unavailable) {} + + AvailabilityInfo(VersionTuple I, VersionTuple D, VersionTuple O, bool U, + bool UD, bool UU) + : Introduced(I), Deprecated(D), Obsoleted(O), Unavailable(U), + UnconditionallyDeprecated(UD), UnconditionallyUnavailable(UU) {} + + bool isDefault() const { return *this == AvailabilityInfo(); } + bool isUnavailable() const { return Unavailable; } + bool isUnconditionallyDeprecated() const { return UnconditionallyDeprecated; } + bool isUnconditionallyUnavailable() const { + return UnconditionallyUnavailable; + } + + friend bool operator==(const AvailabilityInfo &Lhs, + const AvailabilityInfo &Rhs); +}; + +inline bool operator==(const AvailabilityInfo &Lhs, + const AvailabilityInfo &Rhs) { + return std::tie(Lhs.Introduced, Lhs.Deprecated, Lhs.Obsoleted, + Lhs.Unavailable, Lhs.UnconditionallyDeprecated, + Lhs.UnconditionallyUnavailable) == + std::tie(Rhs.Introduced, Rhs.Deprecated, Rhs.Obsoleted, + Rhs.Unavailable, Rhs.UnconditionallyDeprecated, + Rhs.UnconditionallyUnavailable); +} + +} // namespace symbolgraph +} // namespace clang + +#endif // LLVM_CLANG_SYMBOLGRAPH_AVAILABILITY_INFO_H diff --git a/clang/include/clang/SymbolGraph/DeclarationFragments.h b/clang/include/clang/SymbolGraph/DeclarationFragments.h new file mode 100644 --- /dev/null +++ b/clang/include/clang/SymbolGraph/DeclarationFragments.h @@ -0,0 +1,140 @@ +//===- SymbolGraph/DeclarationFragments.h -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Defines SymbolGraph Declaration Fragments related classes. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_SYMBOLGRAPH_DECLARATION_FRAGMENTS_H +#define LLVM_CLANG_SYMBOLGRAPH_DECLARATION_FRAGMENTS_H + +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" +#include "llvm/ADT/StringRef.h" +#include + +namespace clang { +namespace symbolgraph { + +class DeclarationFragments { +public: + DeclarationFragments() = default; + + enum class FragmentKind { + None, + Keyword, + Attribute, + NumberLiteral, + StringLiteral, + Identifier, + TypeIdentifier, + GenericParameter, + ExternalParam, + InternalParam, + Text, + }; + + struct Fragment { + std::string Spelling; + FragmentKind Kind; + std::string PreciseIdentifier; + + Fragment(StringRef Spelling, FragmentKind Kind, StringRef PreciseIdentifier) + : Spelling(Spelling), Kind(Kind), PreciseIdentifier(PreciseIdentifier) { + } + }; + + const std::vector &getFragments() const { return Fragments; } + + DeclarationFragments &append(StringRef Spelling, FragmentKind Kind, + StringRef PreciseIdentifier = "") { + if (Kind == FragmentKind::Text && !Fragments.empty() && + Fragments.back().Kind == FragmentKind::Text) { + Fragments.back().Spelling.append(Spelling.data(), Spelling.size()); + } else { + Fragments.emplace_back(Spelling, Kind, PreciseIdentifier); + } + return *this; + } + + DeclarationFragments &append(DeclarationFragments &&Other) { + Fragments.insert(Fragments.end(), + std::make_move_iterator(Other.Fragments.begin()), + std::make_move_iterator(Other.Fragments.end())); + Other.Fragments.clear(); + return *this; + } + + DeclarationFragments &appendSpace(); + + static StringRef getFragmentKindString(FragmentKind Kind); + static FragmentKind parseFragmentKindFromString(StringRef S); + +private: + std::vector Fragments; +}; + +class FunctionSignature { +public: + FunctionSignature() = default; + + struct Parameter { + std::string Name; + DeclarationFragments Fragments; + + Parameter(StringRef Name, DeclarationFragments Fragments) + : Name(Name), Fragments(Fragments) {} + }; + + const std::vector &getParameters() const { return Parameters; } + const DeclarationFragments &getReturnType() const { return ReturnType; } + + FunctionSignature &addParameter(StringRef Name, + DeclarationFragments Fragments) { + Parameters.emplace_back(Name, Fragments); + return *this; + } + + void setReturnType(DeclarationFragments RT) { ReturnType = RT; } + + bool empty() const { + return Parameters.empty() && ReturnType.getFragments().empty(); + } + +private: + std::vector Parameters; + DeclarationFragments ReturnType; +}; + +class DeclarationFragmentsBuilder { +public: + static DeclarationFragments getFragmentsForVar(const VarDecl *); + static DeclarationFragments getFragmentsForFunction(const FunctionDecl *); + static DeclarationFragments getSubHeading(const NamedDecl *); + static FunctionSignature getFunctionSignature(const FunctionDecl *); + +private: + DeclarationFragmentsBuilder() = delete; + + static DeclarationFragments getFragmentsForType(const QualType, ASTContext &, + DeclarationFragments &); + static DeclarationFragments getFragmentsForType(const Type *, ASTContext &, + DeclarationFragments &); + static DeclarationFragments getFragmentsForNNS(const NestedNameSpecifier *, + ASTContext &, + DeclarationFragments &); + static DeclarationFragments getFragmentsForQualifiers(const Qualifiers quals); + static DeclarationFragments getFragmentsForParam(const ParmVarDecl *); +}; + +} // namespace symbolgraph +} // namespace clang + +#endif // LLVM_CLANG_SYMBOLGRAPH_DECLARATION_FRAGMENTS_H diff --git a/clang/include/clang/SymbolGraph/FrontendActions.h b/clang/include/clang/SymbolGraph/FrontendActions.h new file mode 100644 --- /dev/null +++ b/clang/include/clang/SymbolGraph/FrontendActions.h @@ -0,0 +1,33 @@ +//===- SymbolGraph/FrontendActions.h -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Defines SymbolGraph frontend actions. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_SYMBOLGRAPH_FRONTEND_ACTIONS_H +#define LLVM_CLANG_SYMBOLGRAPH_FRONTEND_ACTIONS_H + +#include "clang/Frontend/FrontendAction.h" + +namespace clang { + +class ExtractAPIAction : public ASTFrontendAction { +protected: + std::unique_ptr CreateASTConsumer(CompilerInstance &CI, + StringRef InFile) override; + +public: + static std::unique_ptr + CreateOutputFile(CompilerInstance &CI, StringRef InFile); +}; + +} // namespace clang + +#endif // LLVM_CLANG_SYMBOLGRAPH_FRONTEND_ACTIONS_H diff --git a/clang/include/clang/SymbolGraph/Serialization.h b/clang/include/clang/SymbolGraph/Serialization.h new file mode 100644 --- /dev/null +++ b/clang/include/clang/SymbolGraph/Serialization.h @@ -0,0 +1,58 @@ +//===- SymbolGraph/Serialization.h ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Defines the SymbolGraph serializer and parser. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_SYMBOLGRAPH_SERIALIZATION_H +#define LLVM_CLANG_SYMBOLGRAPH_SERIALIZATION_H + +#include "clang/SymbolGraph/API.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/VersionTuple.h" +#include "llvm/Support/raw_ostream.h" + +namespace clang { +namespace symbolgraph { + +using namespace llvm::json; + +struct SerializerOption { + bool Compact; +}; + +class Serializer { +public: + Serializer(const API &API, SerializerOption Options = {}) + : API(API), Options(Options) {} + + Object serialize(); + void serialize(raw_ostream &os); + +private: + Object serializeMetadata() const; + Object serializeModule() const; + Optional serializeAPIRecord(const APIRecord &Record) const; + void serializeGlobalRecord(const GlobalRecord &Record); + + bool shouldSkip(const APIRecord &Record) const; + + const API &API; + SerializerOption Options; + Array Symbols; + Array Relationships; + + static const VersionTuple FormatVersion; +}; + +} // namespace symbolgraph +} // namespace clang + +#endif // LLVM_CLANG_SYMBOLGRAPH_SERIALIZATION_H diff --git a/clang/lib/AST/RawCommentList.cpp b/clang/lib/AST/RawCommentList.cpp --- a/clang/lib/AST/RawCommentList.cpp +++ b/clang/lib/AST/RawCommentList.cpp @@ -16,6 +16,7 @@ #include "clang/AST/CommentSema.h" #include "clang/Basic/CharInfo.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Support/Allocator.h" using namespace clang; @@ -362,6 +363,24 @@ if (CommentText.empty()) return ""; + std::string Result; + for (const RawComment::CommentLine &Line : + getFormattedLines(SourceMgr, Diags)) + Result += Line.Text + "\n"; + + auto LastChar = Result.find_last_not_of('\n'); + Result.erase(LastChar + 1, Result.size()); + + return Result; +} + +std::vector +RawComment::getFormattedLines(const SourceManager &SourceMgr, + DiagnosticsEngine &Diags) const { + llvm::StringRef CommentText = getRawText(SourceMgr); + if (CommentText.empty()) + return {}; + llvm::BumpPtrAllocator Allocator; // We do not parse any commands, so CommentOptions are ignored by // comments::Lexer. Therefore, we just use default-constructed options. @@ -371,13 +390,23 @@ CommentText.begin(), CommentText.end(), /*ParseCommands=*/false); - std::string Result; + std::vector Result; // A column number of the first non-whitespace token in the comment text. // We skip whitespace up to this column, but keep the whitespace after this // column. IndentColumn is calculated when lexing the first line and reused // for the rest of lines. unsigned IndentColumn = 0; + // Record the line number of the last processed comment line. + // For block-style comments, an extra newline token will be produced after + // the end-comment marker, e.g.: + // /** This is a multi-line comment block. + // The lexer will produce two newline tokens here > */ + // previousLine will record the line number when we previously saw a newline + // token and recorded a comment line. If we see another newline token on the + // same line, don't record anything in between. + unsigned PreviousLine = 0; + // Processes one line of the comment and adds it to the result. // Handles skipping the indent at the start of the line. // Returns false when eof is reached and true otherwise. @@ -389,9 +418,14 @@ if (Tok.is(comments::tok::eof)) return false; if (Tok.is(comments::tok::newline)) { - Result += "\n"; + PresumedLoc Loc = SourceMgr.getPresumedLoc(Tok.getLocation()); + if (Loc.getLine() != PreviousLine) { + Result.emplace_back("", Loc, Loc); + PreviousLine = Loc.getLine(); + } return true; } + SmallString<124> Line; llvm::StringRef TokText = L.getSpelling(Tok, SourceMgr); bool LocInvalid = false; unsigned TokColumn = @@ -417,32 +451,35 @@ WhitespaceLen, std::max(static_cast(IndentColumn) - TokColumn, 0)); llvm::StringRef Trimmed = TokText.drop_front(SkipLen); - Result += Trimmed; + Line += Trimmed; + // Get the beginning location of the adjusted comment line. + PresumedLoc Begin = + SourceMgr.getPresumedLoc(Tok.getLocation().getLocWithOffset(SkipLen)); + // Lex all tokens in the rest of the line. for (L.lex(Tok); Tok.isNot(comments::tok::eof); L.lex(Tok)) { if (Tok.is(comments::tok::newline)) { - Result += "\n"; + // Get the ending location of the comment line. + PresumedLoc End = SourceMgr.getPresumedLoc(Tok.getLocation()); + if (End.getLine() != PreviousLine) { + Result.emplace_back(Line, Begin, End); + PreviousLine = End.getLine(); + } return true; } - Result += L.getSpelling(Tok, SourceMgr); + Line += L.getSpelling(Tok, SourceMgr); } + PresumedLoc End = SourceMgr.getPresumedLoc(Tok.getLocation()); + Result.emplace_back(Line, Begin, End); // We've reached the end of file token. return false; }; - auto DropTrailingNewLines = [](std::string &Str) { - while (!Str.empty() && Str.back() == '\n') - Str.pop_back(); - }; - // Process first line separately to remember indent for the following lines. - if (!LexLine(/*IsFirstLine=*/true)) { - DropTrailingNewLines(Result); + if (!LexLine(/*IsFirstLine=*/true)) return Result; - } // Process the rest of the lines. while (LexLine(/*IsFirstLine=*/false)) ; - DropTrailingNewLines(Result); return Result; } diff --git a/clang/lib/CMakeLists.txt b/clang/lib/CMakeLists.txt --- a/clang/lib/CMakeLists.txt +++ b/clang/lib/CMakeLists.txt @@ -23,6 +23,7 @@ add_subdirectory(Index) add_subdirectory(IndexSerialization) add_subdirectory(StaticAnalyzer) +add_subdirectory(SymbolGraph) add_subdirectory(Format) add_subdirectory(Testing) add_subdirectory(Interpreter) diff --git a/clang/lib/Frontend/CMakeLists.txt b/clang/lib/Frontend/CMakeLists.txt --- a/clang/lib/Frontend/CMakeLists.txt +++ b/clang/lib/Frontend/CMakeLists.txt @@ -20,7 +20,6 @@ DependencyFile.cpp DependencyGraph.cpp DiagnosticRenderer.cpp - ExtractAPIConsumer.cpp FrontendAction.cpp FrontendActions.cpp FrontendOptions.cpp diff --git a/clang/lib/Frontend/ExtractAPIConsumer.cpp b/clang/lib/Frontend/ExtractAPIConsumer.cpp deleted file mode 100644 --- a/clang/lib/Frontend/ExtractAPIConsumer.cpp +++ /dev/null @@ -1,32 +0,0 @@ -#include "clang/AST/ASTConsumer.h" -#include "clang/AST/RecursiveASTVisitor.h" -#include "clang/Frontend/ASTConsumers.h" -#include "clang/Frontend/CompilerInstance.h" -#include "clang/Frontend/FrontendActions.h" - -using namespace clang; - -namespace { -class ExtractAPIVisitor : public RecursiveASTVisitor { -public: - bool VisitNamedDecl(NamedDecl *Decl) { - llvm::outs() << Decl->getName() << "\n"; - return true; - } -}; - -class ExtractAPIConsumer : public ASTConsumer { -public: - void HandleTranslationUnit(ASTContext &Context) override { - Visitor.TraverseDecl(Context.getTranslationUnitDecl()); - } - -private: - ExtractAPIVisitor Visitor; -}; -} // namespace - -std::unique_ptr -ExtractAPIAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { - return std::make_unique(); -} diff --git a/clang/lib/FrontendTool/CMakeLists.txt b/clang/lib/FrontendTool/CMakeLists.txt --- a/clang/lib/FrontendTool/CMakeLists.txt +++ b/clang/lib/FrontendTool/CMakeLists.txt @@ -9,6 +9,7 @@ clangDriver clangFrontend clangRewriteFrontend + clangSymbolGraph ) if(CLANG_ENABLE_ARCMT) diff --git a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp --- a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp +++ b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp @@ -25,6 +25,7 @@ #include "clang/Rewrite/Frontend/FrontendActions.h" #include "clang/StaticAnalyzer/Frontend/AnalyzerHelpFlags.h" #include "clang/StaticAnalyzer/Frontend/FrontendActions.h" +#include "clang/SymbolGraph/FrontendActions.h" #include "llvm/Option/OptTable.h" #include "llvm/Option/Option.h" #include "llvm/Support/BuryPointer.h" diff --git a/clang/lib/SymbolGraph/API.cpp b/clang/lib/SymbolGraph/API.cpp new file mode 100644 --- /dev/null +++ b/clang/lib/SymbolGraph/API.cpp @@ -0,0 +1,83 @@ +//===- SymbolGraph/API.cpp --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Defines SymbolGraph API records. +/// +//===----------------------------------------------------------------------===// + +#include "clang/SymbolGraph/API.h" +#include "clang/AST/CommentCommandTraits.h" +#include "clang/AST/CommentLexer.h" +#include "clang/AST/RawCommentList.h" +#include "clang/Index/USRGeneration.h" +#include "llvm/Support/Allocator.h" + +namespace clang { +namespace symbolgraph { + +APIRecord::~APIRecord() {} + +GlobalRecord * +API::addGlobal(GVKind Kind, StringRef Name, StringRef USR, PresumedLoc Loc, + const AvailabilityInfo &Availability, LinkageInfo Linkage, + const DocComment &Comment, DeclarationFragments Fragments, + DeclarationFragments SubHeading, FunctionSignature Signature) { + auto Result = Globals.insert({Name, nullptr}); + if (Result.second) { + GlobalRecord *Record = new (Allocator) + GlobalRecord{Kind, Name, USR, Loc, Availability, + Linkage, Comment, Fragments, SubHeading, Signature}; + Result.first->second = Record; + } + return Result.first->second; +} + +GlobalRecord *API::addGlobalVar(StringRef Name, StringRef USR, PresumedLoc Loc, + const AvailabilityInfo &Availability, + LinkageInfo Linkage, const DocComment &Comment, + DeclarationFragments Fragments, + DeclarationFragments SubHeading) { + return addGlobal(GVKind::Variable, Name, USR, Loc, Availability, Linkage, + Comment, Fragments, SubHeading, {}); +} + +GlobalRecord *API::addFunction(StringRef Name, StringRef USR, PresumedLoc Loc, + const AvailabilityInfo &Availability, + LinkageInfo Linkage, const DocComment &Comment, + DeclarationFragments Fragments, + DeclarationFragments SubHeading, + FunctionSignature Signature) { + return addGlobal(GVKind::Function, Name, USR, Loc, Availability, Linkage, + Comment, Fragments, SubHeading, Signature); +} + +StringRef API::recordUSR(const Decl *D) { + SmallString<128> USR; + index::generateUSRForDecl(D, USR); + return copyString(USR); +} + +StringRef API::copyString(StringRef String, llvm::BumpPtrAllocator &Allocator) { + if (String.empty()) + return {}; + + if (Allocator.identifyObject(String.data())) + return String; + + void *Ptr = Allocator.Allocate(String.size(), 1); + memcpy(Ptr, String.data(), String.size()); + return StringRef(reinterpret_cast(Ptr), String.size()); +} + +StringRef API::copyString(StringRef String) { + return copyString(String, Allocator); +} + +} // namespace symbolgraph +} // namespace clang diff --git a/clang/lib/SymbolGraph/CMakeLists.txt b/clang/lib/SymbolGraph/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/clang/lib/SymbolGraph/CMakeLists.txt @@ -0,0 +1,16 @@ +set(LLVM_LINK_COMPONENTS + Support + ) + +add_clang_library(clangSymbolGraph + API.cpp + ExtractAPIConsumer.cpp + DeclarationFragments.cpp + Serialization.cpp + + LINK_LIBS + clangAST + clangBasic + clangFrontend + clangIndex + ) diff --git a/clang/lib/SymbolGraph/DeclarationFragments.cpp b/clang/lib/SymbolGraph/DeclarationFragments.cpp new file mode 100644 --- /dev/null +++ b/clang/lib/SymbolGraph/DeclarationFragments.cpp @@ -0,0 +1,434 @@ +//===- SymbolGraph/DeclarationFragments.cpp ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Defines SymbolGraph Declaration Fragments related classes. +/// +//===----------------------------------------------------------------------===// + +#include "clang/SymbolGraph/DeclarationFragments.h" +#include "clang/Index/USRGeneration.h" +#include "llvm/ADT/StringSwitch.h" + +namespace clang { +namespace symbolgraph { + +DeclarationFragments &DeclarationFragments::appendSpace() { + if (!Fragments.empty()) { + Fragment Last = Fragments.back(); + if (Last.Kind == FragmentKind::Text) { + if (Last.Spelling.back() != ' ') { + Last.Spelling.push_back(' '); + } + } else { + append(" ", FragmentKind::Text); + } + } + + return *this; +} + +StringRef DeclarationFragments::getFragmentKindString( + DeclarationFragments::FragmentKind Kind) { + switch (Kind) { + case DeclarationFragments::FragmentKind::None: + return "none"; + case DeclarationFragments::FragmentKind::Keyword: + return "keyword"; + case DeclarationFragments::FragmentKind::Attribute: + return "attribute"; + case DeclarationFragments::FragmentKind::NumberLiteral: + return "number"; + case DeclarationFragments::FragmentKind::StringLiteral: + return "string"; + case DeclarationFragments::FragmentKind::Identifier: + return "identifier"; + case DeclarationFragments::FragmentKind::TypeIdentifier: + return "typeIdentifier"; + case DeclarationFragments::FragmentKind::GenericParameter: + return "genericParameter"; + case DeclarationFragments::FragmentKind::ExternalParam: + return "externalParam"; + case DeclarationFragments::FragmentKind::InternalParam: + return "internalParam"; + case DeclarationFragments::FragmentKind::Text: + return "text"; + } + + llvm_unreachable("Unhandled FragmentKind"); +} + +DeclarationFragments::FragmentKind +DeclarationFragments::parseFragmentKindFromString(StringRef S) { + return llvm::StringSwitch(S) + .Case("keyword", DeclarationFragments::FragmentKind::Keyword) + .Case("attribute", DeclarationFragments::FragmentKind::Attribute) + .Case("number", DeclarationFragments::FragmentKind::NumberLiteral) + .Case("string", DeclarationFragments::FragmentKind::StringLiteral) + .Case("identifier", DeclarationFragments::FragmentKind::Identifier) + .Case("typeIdentifier", + DeclarationFragments::FragmentKind::TypeIdentifier) + .Case("genericParameter", + DeclarationFragments::FragmentKind::GenericParameter) + .Case("internalParam", DeclarationFragments::FragmentKind::InternalParam) + .Case("externalParam", DeclarationFragments::FragmentKind::ExternalParam) + .Case("text", DeclarationFragments::FragmentKind::Text) + .Default(DeclarationFragments::FragmentKind::None); +} + +// NNS stores C++ nested name specifiers, which are prefixes to qualified names. +// Build declaration fragments for NNS recursively so that we have the USR for +// every part in a qualified name, and also leaves the actual underlying type +// cleaner for its own fragment. +DeclarationFragments +DeclarationFragmentsBuilder::getFragmentsForNNS(const NestedNameSpecifier *NNS, + ASTContext &Context, + DeclarationFragments &After) { + DeclarationFragments Fragments; + if (NNS->getPrefix()) + Fragments.append(getFragmentsForNNS(NNS->getPrefix(), Context, After)); + + switch (NNS->getKind()) { + case NestedNameSpecifier::Identifier: + Fragments.append(NNS->getAsIdentifier()->getName(), + DeclarationFragments::FragmentKind::Identifier); + break; + + case NestedNameSpecifier::Namespace: { + const NamespaceDecl *NS = NNS->getAsNamespace(); + if (NS->isAnonymousNamespace()) + return Fragments; + SmallString<128> USR; + index::generateUSRForDecl(NS, USR); + Fragments.append(NS->getName(), + DeclarationFragments::FragmentKind::Identifier, USR); + break; + } + + case NestedNameSpecifier::NamespaceAlias: { + const NamespaceAliasDecl *Alias = NNS->getAsNamespaceAlias(); + SmallString<128> USR; + index::generateUSRForDecl(Alias, USR); + Fragments.append(Alias->getName(), + DeclarationFragments::FragmentKind::Identifier, USR); + break; + } + + case NestedNameSpecifier::Global: + // The global specifier `::` at the beginning. No stored value. + break; + + case NestedNameSpecifier::Super: + // Microsoft's `__super` specifier. + Fragments.append("__super", DeclarationFragments::FragmentKind::Keyword); + break; + + case NestedNameSpecifier::TypeSpecWithTemplate: + // A type prefixed by the `template` keyword. + Fragments.append("template", DeclarationFragments::FragmentKind::Keyword); + Fragments.appendSpace(); + // Fallthrough after adding the keyword to handle the actual type. + LLVM_FALLTHROUGH; + + case NestedNameSpecifier::TypeSpec: { + const Type *T = NNS->getAsType(); + // FIXME: Handle C++ template specialization type + Fragments.append(getFragmentsForType(T, Context, After)); + break; + } + } + + // Add the separator text `::` for this segment. + return Fragments.append("::", DeclarationFragments::FragmentKind::Text); +} + +// Recursively build the declaration fragments for an underlying `Type` with +// qualifiers removed. +DeclarationFragments DeclarationFragmentsBuilder::getFragmentsForType( + const Type *T, ASTContext &Context, DeclarationFragments &After) { + assert(T && "invalid type"); + + DeclarationFragments Fragments; + + // Declaration fragments of a pointer type is the declaration fragments of + // the pointee type followed by a `*`, except for Objective-C `id` and `Class` + // pointers, where we do not spell out the `*`. + if (T->isPointerType() || + (T->isObjCObjectPointerType() && + !T->getAs()->isObjCIdOrClassType())) { + return Fragments + .append(getFragmentsForType(T->getPointeeType(), Context, After)) + .append(" *", DeclarationFragments::FragmentKind::Text); + } + + // Declaration fragments of a lvalue reference type is the declaration + // fragments of the underlying type followed by a `&`. + if (const LValueReferenceType *LRT = dyn_cast(T)) + return Fragments + .append( + getFragmentsForType(LRT->getPointeeTypeAsWritten(), Context, After)) + .append(" &", DeclarationFragments::FragmentKind::Text); + + // Declaration fragments of a rvalue reference type is the declaration + // fragments of the underlying type followed by a `&&`. + if (const RValueReferenceType *RRT = dyn_cast(T)) + return Fragments + .append( + getFragmentsForType(RRT->getPointeeTypeAsWritten(), Context, After)) + .append(" &&", DeclarationFragments::FragmentKind::Text); + + // Declaration fragments of an array-typed variable have two parts: + // 1. the element type of the array that appears before the variable name; + // 2. array brackets `[(0-9)?]` that appear after the variable name. + if (const ArrayType *AT = T->getAsArrayTypeUnsafe()) { + // Build the "after" part first because the inner element type might also + // be an array-type. For example `int matrix[3][4]` which has a type of + // "(array 3 of (array 4 of ints))." + // Push the array size part first to make sure they are in the right order. + After.append("[", DeclarationFragments::FragmentKind::Text); + + switch (AT->getSizeModifier()) { + case ArrayType::Normal: + break; + case ArrayType::Static: + Fragments.append("static", DeclarationFragments::FragmentKind::Keyword); + break; + case ArrayType::Star: + Fragments.append("*", DeclarationFragments::FragmentKind::Text); + break; + } + + if (const ConstantArrayType *CAT = dyn_cast(AT)) { + // FIXME: right now this would evaluate any expressions/macros written in + // the original source to concrete values. For example + // `int nums[MAX]` -> `int nums[100]` + // `char *str[5 + 1]` -> `char *str[6]` + SmallString<128> Size; + CAT->getSize().toStringUnsigned(Size); + After.append(Size, DeclarationFragments::FragmentKind::NumberLiteral); + } + + After.append("]", DeclarationFragments::FragmentKind::Text); + + return Fragments.append( + getFragmentsForType(AT->getElementType(), Context, After)); + } + + // An ElaboratedType is a sugar for types that are referred to using an + // elaborated keyword, e.g., `struct S`, `enum E`, or (in C++) via a + // qualified name, e.g., `N::M::type`, or both. + if (const ElaboratedType *ET = dyn_cast(T)) { + ElaboratedTypeKeyword Keyword = ET->getKeyword(); + if (Keyword != ETK_None) { + Fragments + .append(ElaboratedType::getKeywordName(Keyword), + DeclarationFragments::FragmentKind::Keyword) + .appendSpace(); + } + + if (const NestedNameSpecifier *NNS = ET->getQualifier()) + Fragments.append(getFragmentsForNNS(NNS, Context, After)); + + // After handling the elaborated keyword or qualified name, build + // declaration fragments for the desugared underlying type. + return Fragments.append(getFragmentsForType(ET->desugar(), Context, After)); + } + + // Everything we care about has been handled now, reduce to the canonical + // unqualified base type. + QualType Base = T->getCanonicalTypeUnqualified(); + + // Default fragment builder for other kinds of types (BuiltinType etc.) + SmallString<128> USR; + clang::index::generateUSRForType(Base, Context, USR); + Fragments.append(Base.getAsString(), + DeclarationFragments::FragmentKind::TypeIdentifier, USR); + + return Fragments; +} + +DeclarationFragments +DeclarationFragmentsBuilder::getFragmentsForQualifiers(const Qualifiers Quals) { + DeclarationFragments Fragments; + if (Quals.hasConst()) + Fragments.append("const", DeclarationFragments::FragmentKind::Keyword); + if (Quals.hasVolatile()) + Fragments.append("volatile", DeclarationFragments::FragmentKind::Keyword); + if (Quals.hasRestrict()) + Fragments.append("restrict", DeclarationFragments::FragmentKind::Keyword); + + return Fragments; +} + +DeclarationFragments DeclarationFragmentsBuilder::getFragmentsForType( + const QualType QT, ASTContext &Context, DeclarationFragments &After) { + assert(!QT.isNull() && "invalid type"); + + if (const ParenType *PT = dyn_cast(QT)) { + After.append(")", DeclarationFragments::FragmentKind::Text); + return getFragmentsForType(PT->getInnerType(), Context, After) + .append("(", DeclarationFragments::FragmentKind::Text); + } + + const SplitQualType SQT = QT.split(); + DeclarationFragments QualsFragments = getFragmentsForQualifiers(SQT.Quals), + TypeFragments = + getFragmentsForType(SQT.Ty, Context, After); + if (QualsFragments.getFragments().empty()) + return TypeFragments; + + // Use east qualifier for pointer types + // For example: + // ``` + // int * const + // ^---- ^---- + // type qualifier + // ^----------------- + // const pointer to int + // ``` + // should not be reconstructed as + // ``` + // const int * + // ^---- ^-- + // qualifier type + // ^---------------- ^ + // pointer to const int + // ``` + if (SQT.Ty->isAnyPointerType()) + return TypeFragments.appendSpace().append(std::move(QualsFragments)); + + return QualsFragments.appendSpace().append(std::move(TypeFragments)); +} + +DeclarationFragments +DeclarationFragmentsBuilder::getFragmentsForVar(const VarDecl *Var) { + DeclarationFragments Fragments; + StorageClass SC = Var->getStorageClass(); + if (SC != SC_None) + Fragments + .append(VarDecl::getStorageClassSpecifierString(SC), + DeclarationFragments::FragmentKind::Keyword) + .appendSpace(); + QualType T = + Var->getTypeSourceInfo() + ? Var->getTypeSourceInfo()->getType() + : Var->getASTContext().getUnqualifiedObjCPointerType(Var->getType()); + + // Capture potential fragments that needs to be placed after the variable name + // ``` + // int nums[5]; + // char (*ptr_to_array)[6]; + // ``` + DeclarationFragments After; + return Fragments.append(getFragmentsForType(T, Var->getASTContext(), After)) + .appendSpace() + .append(Var->getName(), DeclarationFragments::FragmentKind::Identifier) + .append(std::move(After)); +} + +DeclarationFragments +DeclarationFragmentsBuilder::getFragmentsForParam(const ParmVarDecl *Param) { + DeclarationFragments Fragments, After; + + QualType T = Param->getTypeSourceInfo() + ? Param->getTypeSourceInfo()->getType() + : Param->getASTContext().getUnqualifiedObjCPointerType( + Param->getType()); + + DeclarationFragments TypeFragments = + getFragmentsForType(T, Param->getASTContext(), After); + + if (Param->isObjCMethodParameter()) + Fragments.append("(", DeclarationFragments::FragmentKind::Text) + .append(std::move(TypeFragments)) + .append(")", DeclarationFragments::FragmentKind::Text); + else + Fragments.append(std::move(TypeFragments)).appendSpace(); + + return Fragments + .append(Param->getName(), + DeclarationFragments::FragmentKind::InternalParam) + .append(std::move(After)); +} + +DeclarationFragments +DeclarationFragmentsBuilder::getFragmentsForFunction(const FunctionDecl *Func) { + DeclarationFragments Fragments; + // FIXME: Handle template specialization + switch (Func->getStorageClass()) { + case SC_None: + case SC_PrivateExtern: + break; + case SC_Extern: + Fragments.append("extern", DeclarationFragments::FragmentKind::Keyword) + .appendSpace(); + break; + case SC_Static: + Fragments.append("static", DeclarationFragments::FragmentKind::Keyword) + .appendSpace(); + break; + case SC_Auto: + case SC_Register: + llvm_unreachable("invalid for functions"); + } + // FIXME: Handle C++ function specifiers: constexpr, consteval, explicit, etc. + + // FIXME: Is `after` actually needed here? + DeclarationFragments After; + Fragments + .append(getFragmentsForType(Func->getReturnType(), Func->getASTContext(), + After)) + .appendSpace() + .append(Func->getName(), DeclarationFragments::FragmentKind::Identifier) + .append(std::move(After)); + + Fragments.append("(", DeclarationFragments::FragmentKind::Text); + for (unsigned i = 0, end = Func->getNumParams(); i != end; ++i) { + if (i) + Fragments.append(", ", DeclarationFragments::FragmentKind::Text); + Fragments.append(getFragmentsForParam(Func->getParamDecl(i))); + } + Fragments.append(")", DeclarationFragments::FragmentKind::Text); + + // FIXME: Handle exception specifiers: throw, noexcept + return Fragments; +} + +FunctionSignature +DeclarationFragmentsBuilder::getFunctionSignature(const FunctionDecl *Func) { + FunctionSignature Signature; + + for (const auto *Param : Func->parameters()) { + StringRef Name = Param->getName(); + DeclarationFragments Fragments = getFragmentsForParam(Param); + + Signature.addParameter(Name, Fragments); + } + + DeclarationFragments After; + DeclarationFragments Returns = + getFragmentsForType(Func->getReturnType(), Func->getASTContext(), After) + .append(std::move(After)); + + Signature.setReturnType(Returns); + + return Signature; +} + +// Subheading of a symbol defaults to its name. +DeclarationFragments +DeclarationFragmentsBuilder::getSubHeading(const NamedDecl *Decl) { + DeclarationFragments Fragments; + if (!Decl->getName().empty()) + Fragments.append(Decl->getName(), + DeclarationFragments::FragmentKind::Identifier); + return Fragments; +} + +} // namespace symbolgraph +} // namespace clang diff --git a/clang/lib/SymbolGraph/ExtractAPIConsumer.cpp b/clang/lib/SymbolGraph/ExtractAPIConsumer.cpp new file mode 100644 --- /dev/null +++ b/clang/lib/SymbolGraph/ExtractAPIConsumer.cpp @@ -0,0 +1,205 @@ +//===- ExtractAPIConsumer.cpp -----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Defines the ExtractAPI AST visitor to collect API information. +/// +//===----------------------------------------------------------------------===// +// + +#include "clang/AST/ASTConsumer.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/ParentMapContext.h" +#include "clang/AST/RawCommentList.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/Frontend/ASTConsumers.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/SymbolGraph/API.h" +#include "clang/SymbolGraph/AvailabilityInfo.h" +#include "clang/SymbolGraph/DeclarationFragments.h" +#include "clang/SymbolGraph/FrontendActions.h" +#include "clang/SymbolGraph/Serialization.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace symbolgraph; + +namespace { +class ExtractAPIVisitor : public RecursiveASTVisitor { +public: + explicit ExtractAPIVisitor(ASTContext &Context) + : Context(Context), + API(Context.getTargetInfo().getTriple(), Context.getLangOpts()) {} + + const API &getAPI() const { return API; } + + bool VisitVarDecl(const VarDecl *Decl) { + // Skip function parameters. + if (isa(Decl)) + return true; + + // Skip non-global variables in records (struct/union/class). + if (Decl->getDeclContext()->isRecord()) + return true; + + // Skip local variables inside function or method. + if (!Decl->isDefinedOutsideFunctionOrMethod()) + return true; + + // If this is a template but not specialization or instantiation, skip. + if (Decl->getASTContext().getTemplateOrSpecializationInfo(Decl) && + Decl->getTemplateSpecializationKind() == TSK_Undeclared) + return true; + + StringRef Name = Decl->getName(); + StringRef USR = API.recordUSR(Decl); + PresumedLoc Loc = + Context.getSourceManager().getPresumedLoc(Decl->getLocation()); + AvailabilityInfo Availability = getAvailability(Decl); + LinkageInfo Linkage = Decl->getLinkageAndVisibility(); + DocComment Comment; + if (auto *RawComment = Context.getRawCommentForDeclNoCache(Decl)) + Comment = RawComment->getFormattedLines(Context.getSourceManager(), + Context.getDiagnostics()); + DeclarationFragments Declaration = + DeclarationFragmentsBuilder::getFragmentsForVar(Decl); + DeclarationFragments SubHeading = + DeclarationFragmentsBuilder::getSubHeading(Decl); + + API.addGlobalVar(Name, USR, Loc, Availability, Linkage, Comment, + Declaration, SubHeading); + return true; + } + + bool VisitFunctionDecl(const FunctionDecl *Decl) { + if (const auto *Method = dyn_cast(Decl)) { + // Skip member function in class templates. + if (Method->getParent()->getDescribedClassTemplate() != nullptr) + return true; + + // Skip methods in records. + for (auto P : Context.getParents(*Method)) { + if (P.get()) + return true; + } + + // Skip ConstructorDecl and DestructorDecl. + if (isa(Method) || isa(Method)) + return true; + } + + // Skip templated functions. + switch (Decl->getTemplatedKind()) { + case FunctionDecl::TK_NonTemplate: + break; + case FunctionDecl::TK_MemberSpecialization: + case FunctionDecl::TK_FunctionTemplateSpecialization: + if (auto *TemplateInfo = Decl->getTemplateSpecializationInfo()) { + if (!TemplateInfo->isExplicitInstantiationOrSpecialization()) + return true; + } + break; + case FunctionDecl::TK_FunctionTemplate: + case FunctionDecl::TK_DependentFunctionTemplateSpecialization: + return true; + } + + StringRef Name = Decl->getName(); + StringRef USR = API.recordUSR(Decl); + PresumedLoc Loc = + Context.getSourceManager().getPresumedLoc(Decl->getLocation()); + AvailabilityInfo Availability = getAvailability(Decl); + LinkageInfo Linkage = Decl->getLinkageAndVisibility(); + DocComment Comment; + if (auto *RawComment = Context.getRawCommentForDeclNoCache(Decl)) + Comment = RawComment->getFormattedLines(Context.getSourceManager(), + Context.getDiagnostics()); + DeclarationFragments Declaration = + DeclarationFragmentsBuilder::getFragmentsForFunction(Decl); + DeclarationFragments SubHeading = + DeclarationFragmentsBuilder::getSubHeading(Decl); + FunctionSignature Signature = + DeclarationFragmentsBuilder::getFunctionSignature(Decl); + + API.addFunction(Name, USR, Loc, Availability, Linkage, Comment, Declaration, + SubHeading, Signature); + return true; + } + +private: + AvailabilityInfo getAvailability(const Decl *D) const { + StringRef PlatformName = Context.getTargetInfo().getPlatformName(); + + AvailabilityInfo Availability; + for (const auto *RD : D->redecls()) { + for (const auto *A : RD->specific_attrs()) { + if (A->getPlatform()->getName() != PlatformName) + continue; + Availability = AvailabilityInfo(A->getIntroduced(), A->getDeprecated(), + A->getObsoleted(), A->getUnavailable(), + /* UnconditionallyDeprecated */ false, + /* UnconditionallyUnavailable */ false); + break; + } + + if (const auto *A = RD->getAttr()) + if (!A->isImplicit()) { + Availability.Unavailable = true; + Availability.UnconditionallyUnavailable = true; + } + + if (const auto *A = RD->getAttr()) + if (!A->isImplicit()) + Availability.UnconditionallyDeprecated = true; + } + + return Availability; + } + + ASTContext &Context; + API API; +}; + +class ExtractAPIConsumer : public ASTConsumer { +public: + ExtractAPIConsumer(ASTContext &Context, std::unique_ptr OS) + : Visitor(Context), OS(std::move(OS)) {} + + void HandleTranslationUnit(ASTContext &Context) override { + Visitor.TraverseDecl(Context.getTranslationUnitDecl()); + Serializer Serializer(Visitor.getAPI()); + Serializer.serialize(*OS); + } + +private: + ExtractAPIVisitor Visitor; + std::unique_ptr OS; +}; +} // namespace + +std::unique_ptr +ExtractAPIAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { + std::unique_ptr OS = CreateOutputFile(CI, InFile); + if (!OS) + return nullptr; + return std::make_unique(CI.getASTContext(), + std::move(OS)); +} + +std::unique_ptr +ExtractAPIAction::CreateOutputFile(CompilerInstance &CI, StringRef InFile) { + std::unique_ptr OS = + CI.createDefaultOutputFile(/*Binary=*/false, InFile, /*Extension=*/"json", + /*RemoveFileOnSignal=*/false); + if (!OS) + return nullptr; + return OS; +} diff --git a/clang/lib/SymbolGraph/Serialization.cpp b/clang/lib/SymbolGraph/Serialization.cpp new file mode 100644 --- /dev/null +++ b/clang/lib/SymbolGraph/Serialization.cpp @@ -0,0 +1,332 @@ +//===- SymbolGraph/Serialization.cpp ----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Defines the SymbolGraph serializer and parser. +/// +//===----------------------------------------------------------------------===// + +#include "clang/SymbolGraph/Serialization.h" +#include "clang/Basic/Version.h" +#include "clang/SymbolGraph/API.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/VersionTuple.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace clang::symbolgraph; +using namespace llvm; +using namespace llvm::json; + +namespace { + +static void serializeObject(Object &Paren, StringRef Key, + Optional Obj) { + if (Obj) + Paren[Key] = std::move(Obj.getValue()); +} + +static void serializeArray(Object &Paren, StringRef Key, + Optional Array) { + if (Array) + Paren[Key] = std::move(Array.getValue()); +} + +// SymbolGraph: SemanticVersion +static Optional serializeSemanticVersion(const VersionTuple &V) { + if (V.empty()) + return None; + + Object Version; + Version["major"] = V.getMajor(); + Version["minor"] = V.getMinor().getValueOr(0); + Version["patch"] = V.getSubminor().getValueOr(0); + return Version; +} + +static Object serializeOperatingSystem(const Triple &T) { + Object OS; + OS["name"] = T.getOSTypeName(T.getOS()); + serializeObject(OS, "minimumVersion", + serializeSemanticVersion(T.getMinimumSupportedOSVersion())); + return OS; +} + +// SymbolGraph: Platform +static Object serializePlatform(const Triple &T) { + Object Platform; + Platform["architecture"] = T.getArchName(); + Platform["vendor"] = T.getVendorName(); + Platform["operatingSystem"] = serializeOperatingSystem(T); + return Platform; +} + +// SymbolGraph: SourcePosition +static Object serializeSourcePosition(const PresumedLoc &Loc, + bool IncludeFileURI = false) { + assert(Loc.isValid() && "invalid source position"); + + Object SourcePosition; + SourcePosition["line"] = Loc.getLine(); + SourcePosition["character"] = Loc.getColumn(); + + if (IncludeFileURI) { + std::string FileURI = "file://"; + FileURI += sys::path::convert_to_slash(Loc.getFilename()); + SourcePosition["uri"] = FileURI; + } + + return SourcePosition; +} + +// SymbolGraph: SourceRange +static Object serializeSourceRange(const PresumedLoc &BeginLoc, + const PresumedLoc &EndLoc) { + Object SourceRange; + serializeObject(SourceRange, "start", serializeSourcePosition(BeginLoc)); + serializeObject(SourceRange, "end", serializeSourcePosition(EndLoc)); + return SourceRange; +} + +// SymbolGraph: AvailabilityItem +static Optional serializeAvailability(const AvailabilityInfo &Avail) { + if (Avail.isDefault()) + return None; + + Object Availbility; + serializeObject(Availbility, "introducedVersion", + serializeSemanticVersion(Avail.Introduced)); + serializeObject(Availbility, "deprecatedVersion", + serializeSemanticVersion(Avail.Deprecated)); + serializeObject(Availbility, "obsoletedVersion", + serializeSemanticVersion(Avail.Obsoleted)); + if (Avail.isUnavailable()) + Availbility["isUnconditionallyUnavailable"] = true; + if (Avail.isUnconditionallyDeprecated()) + Availbility["isUnconditionallyDeprecated"] = true; + + return Availbility; +} + +static StringRef getLanguageName(const LangOptions &LangOpts) { + auto Language = + LangStandard::getLangStandardForKind(LangOpts.LangStd).getLanguage(); + switch (Language) { + case Language::C: + return "c"; + case Language::ObjC: + return "objc"; + + // Unsupported language currently + case Language::CXX: + case Language::ObjCXX: + case Language::OpenCL: + case Language::OpenCLCXX: + case Language::CUDA: + case Language::RenderScript: + case Language::HIP: + + // Languages that the frontend cannot parse and compile + case Language::Unknown: + case Language::Asm: + case Language::LLVM_IR: + llvm_unreachable("Unsupported language kind"); + } + + llvm_unreachable("Unhandled language kind"); +} + +// SymbolGraph: Symbol::identifier +static Object serializeIdentifier(const APIRecord &Record, + const LangOptions &LangOpts) { + Object Identifier; + Identifier["precise"] = Record.USR; + Identifier["interfaceLanguage"] = getLanguageName(LangOpts); + + return Identifier; +} + +// SymbolGraph: DocComment +static Optional serializeDocComment(const DocComment &Comment) { + if (Comment.empty()) + return None; + + Object DocComment; + Array LinesArray; + for (const auto &CommentLine : Comment) { + Object Line; + Line["text"] = CommentLine.Text; + serializeObject(Line, "range", + serializeSourceRange(CommentLine.Begin, CommentLine.End)); + LinesArray.emplace_back(std::move(Line)); + } + serializeArray(DocComment, "lines", LinesArray); + + return DocComment; +} + +static Optional +serializeDeclarationFragments(const DeclarationFragments &DF) { + if (DF.getFragments().empty()) + return None; + + Array Fragments; + for (const auto &F : DF.getFragments()) { + Object Fragment; + Fragment["spelling"] = F.Spelling; + Fragment["kind"] = DeclarationFragments::getFragmentKindString(F.Kind); + if (!F.PreciseIdentifier.empty()) + Fragment["preciseIdentifier"] = F.PreciseIdentifier; + Fragments.emplace_back(std::move(Fragment)); + } + + return Fragments; +} + +static Optional +serializeFunctionSignature(const FunctionSignature &FS) { + if (FS.empty()) + return None; + + Object Signature; + serializeArray(Signature, "returns", + serializeDeclarationFragments(FS.getReturnType())); + + Array Parameters; + for (const auto &P : FS.getParameters()) { + Object Parameter; + Parameter["name"] = P.Name; + serializeArray(Parameter, "declarationFragments", + serializeDeclarationFragments(P.Fragments)); + Parameters.emplace_back(std::move(Parameter)); + } + + if (!Parameters.empty()) + Signature["parameters"] = std::move(Parameters); + + return Signature; +} + +static Object serializeNames(const APIRecord &Record) { + Object Names; + Names["title"] = Record.Name; + serializeArray(Names, "subHeading", + serializeDeclarationFragments(Record.SubHeading)); + + return Names; +} + +// SymbolGraph: Symbol::kind +static Object serializeSymbolKind(const APIRecord &Record, + const LangOptions &LangOpts) { + Object Kind; + switch (Record.getKind()) { + case APIRecord::RK_Global: + auto *GR = dyn_cast(&Record); + switch (GR->GlobalKind) { + case GVKind::Function: + Kind["identifier"] = (getLanguageName(LangOpts) + ".func").str(); + Kind["displayName"] = "Function"; + break; + case GVKind::Variable: + Kind["identifier"] = (getLanguageName(LangOpts) + ".var").str(); + Kind["displayName"] = "Global Variable"; + break; + case GVKind::Unknown: + // Unknown global kind + break; + } + break; + } + + return Kind; +} + +} // namespace + +const VersionTuple Serializer::FormatVersion{0, 5, 3}; + +Object Serializer::serializeMetadata() const { + Object Metadata; + serializeObject(Metadata, "formatVersion", + serializeSemanticVersion(FormatVersion)); + Metadata["generator"] = clang::getClangFullVersion(); + return Metadata; +} + +Object Serializer::serializeModule() const { + Object Module; + // FIXME: What to put in here? + Module["name"] = ""; + serializeObject(Module, "platform", serializePlatform(API.getTarget())); + return Module; +} + +bool Serializer::shouldSkip(const APIRecord &Record) const { + // Skip unconditionally unavailable symbols + if (Record.Availability.isUnconditionallyUnavailable()) + return true; + + return false; +} + +Optional Serializer::serializeAPIRecord(const APIRecord &Record) const { + if (shouldSkip(Record)) + return None; + + Object Obj; + serializeObject(Obj, "identifier", + serializeIdentifier(Record, API.getLangOpts())); + serializeObject(Obj, "kind", serializeSymbolKind(Record, API.getLangOpts())); + serializeObject(Obj, "names", serializeNames(Record)); + serializeObject( + Obj, "location", + serializeSourcePosition(Record.Location, /*IncludeFileURI=*/true)); + serializeObject(Obj, "availbility", + serializeAvailability(Record.Availability)); + serializeObject(Obj, "docComment", serializeDocComment(Record.Comment)); + serializeArray(Obj, "declarationFragments", + serializeDeclarationFragments(Record.Declaration)); + + return Obj; +} + +void Serializer::serializeGlobalRecord(const GlobalRecord &Record) { + auto Obj = serializeAPIRecord(Record); + if (!Obj) + return; + + if (Record.GlobalKind == GVKind::Function) + serializeObject(*Obj, "parameters", + serializeFunctionSignature(Record.Signature)); + + Symbols.emplace_back(std::move(*Obj)); +} + +Object Serializer::serialize() { + Object Root; + serializeObject(Root, "metadata", serializeMetadata()); + serializeObject(Root, "module", serializeModule()); + + for (const auto &Global : API.getGlobals()) + serializeGlobalRecord(*Global.second); + + Root["symbols"] = std::move(Symbols); + Root["relationhips"] = std::move(Relationships); + + return Root; +} + +void Serializer::serialize(raw_ostream &os) { + Object root = serialize(); + if (Options.Compact) + os << formatv("{0}", Value(std::move(root))) << "\n"; + else + os << formatv("{0:2}", Value(std::move(root))) << "\n"; +} diff --git a/clang/test/Driver/extract-api.c b/clang/test/Driver/extract-api.c --- a/clang/test/Driver/extract-api.c +++ b/clang/test/Driver/extract-api.c @@ -8,9 +8,3 @@ // EXTRACT-API-PHASES: 2: compiler, {1}, api-information // EXTRACT-API-PHASES-NOT: 3: // EXTRACT-API-PHASES: END - -// FIXME: Check for the dummy output now to verify that the custom action was executed. -// RUN: %clang -extract-api %s | FileCheck -check-prefix DUMMY-OUTPUT %s - -void dummy_function(void); -// DUMMY-OUTPUT: dummy_function diff --git a/clang/test/SymbolGraph/global_record.c b/clang/test/SymbolGraph/global_record.c new file mode 100644 --- /dev/null +++ b/clang/test/SymbolGraph/global_record.c @@ -0,0 +1,367 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: sed -e "s@INPUT_DIR@%/t@g" %t/reference.output.json.in >> \ +// RUN: %t/reference.output.json +// RUN: %clang -extract-api -target arm64-apple-macosx \ +// RUN: %t/input.c -o %t/output.json | FileCheck -allow-empty %s +// RUN: sed -e "s@\"generator\": \"clang.*\"@\"generator\": \"clang\"@g" \ +// RUN: %t/output.json >> %t/output-normalized.json +// RUN: diff %t/reference.output.json %t/output-normalized.json + +// CHECK-NOT: error: +// CHECK-NOT: warning: + +//--- input.c +int num; + +/** + * \brief Add two numbers. + * \param [in] x A number. + * \param [in] y Another number. + * \param [out] res The result of x + y. + */ +void add(const int x, const int y, int *res); + +char unavailable __attribute__((unavailable)); + +//--- reference.output.json.in +{ + "metadata": { + "formatVersion": { + "major": 0, + "minor": 5, + "patch": 3 + }, + "generator": "clang" + }, + "module": { + "name": "", + "platform": { + "architecture": "arm64", + "operatingSystem": { + "minimumVersion": { + "major": 11, + "minor": 0, + "patch": 0 + }, + "name": "macosx" + }, + "vendor": "apple" + } + }, + "relationhips": [], + "symbols": [ + { + "declarationFragments": [ + { + "kind": "typeIdentifier", + "preciseIdentifier": "c:I", + "spelling": "int" + }, + { + "kind": "text", + "spelling": " " + }, + { + "kind": "identifier", + "spelling": "num" + } + ], + "identifier": { + "interfaceLanguage": "c", + "precise": "c:@num" + }, + "kind": { + "displayName": "Global Variable", + "identifier": "c.var" + }, + "location": { + "character": 5, + "line": 1, + "uri": "file://INPUT_DIR/input.c" + }, + "names": { + "subHeading": [ + { + "kind": "identifier", + "spelling": "num" + } + ], + "title": "num" + } + }, + { + "declarationFragments": [ + { + "kind": "typeIdentifier", + "preciseIdentifier": "c:v", + "spelling": "void" + }, + { + "kind": "text", + "spelling": " " + }, + { + "kind": "identifier", + "spelling": "add" + }, + { + "kind": "text", + "spelling": "(" + }, + { + "kind": "keyword", + "spelling": "const" + }, + { + "kind": "text", + "spelling": " " + }, + { + "kind": "typeIdentifier", + "preciseIdentifier": "c:I", + "spelling": "int" + }, + { + "kind": "text", + "spelling": " " + }, + { + "kind": "internalParam", + "spelling": "x" + }, + { + "kind": "text", + "spelling": ", " + }, + { + "kind": "keyword", + "spelling": "const" + }, + { + "kind": "text", + "spelling": " " + }, + { + "kind": "typeIdentifier", + "preciseIdentifier": "c:I", + "spelling": "int" + }, + { + "kind": "text", + "spelling": " " + }, + { + "kind": "internalParam", + "spelling": "y" + }, + { + "kind": "text", + "spelling": ", " + }, + { + "kind": "typeIdentifier", + "preciseIdentifier": "c:I", + "spelling": "int" + }, + { + "kind": "text", + "spelling": " *" + }, + { + "kind": "internalParam", + "spelling": "res" + }, + { + "kind": "text", + "spelling": ")" + } + ], + "docComment": { + "lines": [ + { + "range": { + "end": { + "character": 4, + "line": 3 + }, + "start": { + "character": 4, + "line": 3 + } + }, + "text": "" + }, + { + "range": { + "end": { + "character": 27, + "line": 4 + }, + "start": { + "character": 3, + "line": 4 + } + }, + "text": " \\brief Add two numbers." + }, + { + "range": { + "end": { + "character": 30, + "line": 5 + }, + "start": { + "character": 3, + "line": 5 + } + }, + "text": " \\param [in] x A number." + }, + { + "range": { + "end": { + "character": 36, + "line": 6 + }, + "start": { + "character": 3, + "line": 6 + } + }, + "text": " \\param [in] y Another number." + }, + { + "range": { + "end": { + "character": 41, + "line": 7 + }, + "start": { + "character": 3, + "line": 7 + } + }, + "text": " \\param [out] res The result of x + y." + }, + { + "range": { + "end": { + "character": 4, + "line": 8 + }, + "start": { + "character": 1, + "line": 8 + } + }, + "text": " " + } + ] + }, + "identifier": { + "interfaceLanguage": "c", + "precise": "c:@F@add" + }, + "kind": { + "displayName": "Function", + "identifier": "c.func" + }, + "location": { + "character": 6, + "line": 9, + "uri": "file://INPUT_DIR/input.c" + }, + "names": { + "subHeading": [ + { + "kind": "identifier", + "spelling": "add" + } + ], + "title": "add" + }, + "parameters": { + "parameters": [ + { + "declarationFragments": [ + { + "kind": "keyword", + "spelling": "const" + }, + { + "kind": "text", + "spelling": " " + }, + { + "kind": "typeIdentifier", + "preciseIdentifier": "c:I", + "spelling": "int" + }, + { + "kind": "text", + "spelling": " " + }, + { + "kind": "internalParam", + "spelling": "x" + } + ], + "name": "x" + }, + { + "declarationFragments": [ + { + "kind": "keyword", + "spelling": "const" + }, + { + "kind": "text", + "spelling": " " + }, + { + "kind": "typeIdentifier", + "preciseIdentifier": "c:I", + "spelling": "int" + }, + { + "kind": "text", + "spelling": " " + }, + { + "kind": "internalParam", + "spelling": "y" + } + ], + "name": "y" + }, + { + "declarationFragments": [ + { + "kind": "typeIdentifier", + "preciseIdentifier": "c:I", + "spelling": "int" + }, + { + "kind": "text", + "spelling": " *" + }, + { + "kind": "internalParam", + "spelling": "res" + } + ], + "name": "res" + } + ], + "returns": [ + { + "kind": "typeIdentifier", + "preciseIdentifier": "c:v", + "spelling": "void" + } + ] + } + } + ] +}