Index: CMakeLists.txt =================================================================== --- CMakeLists.txt +++ CMakeLists.txt @@ -7,6 +7,7 @@ endif() add_subdirectory(change-namespace) +add_subdirectory(clang-doc) add_subdirectory(clang-query) add_subdirectory(clang-move) add_subdirectory(clangd) Index: clang-doc/BitcodeWriter.h =================================================================== --- /dev/null +++ clang-doc/BitcodeWriter.h @@ -0,0 +1,244 @@ +//===-- BitcodeWriter.h - ClangDoc Bitcode Writer --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a writer for serializing the clang-doc internal +// representation to LLVM bitcode. The writer takes in a stream and emits the +// generated bitcode to that stream. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_CLANG_DOC_BITCODE_WRITER_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_CLANG_DOC_BITCODE_WRITER_H + +#include +#include +#include "Representation.h" +#include "clang/AST/AST.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Bitcode/BitstreamWriter.h" + +namespace clang { +namespace doc { + +// Current version number of clang-doc bitcode. +// Should be bumped when removing or changing BlockIds, RecordIds, or +// BitCodeConstants, though they can be added without breaking it. +static const unsigned VERSION_NUMBER = 1; + +struct BitCodeConstants { + static constexpr unsigned SignatureBitSize = 8U; + static constexpr unsigned SubblockIDSize = 4U; + static constexpr unsigned IntSize = 16U; + static constexpr unsigned StringLengthSize = 16U; + static constexpr unsigned LineNumberSize = 16U; +}; + +// New Ids need to be added to both the enum here and the relevant IdNameMap in +// the implementation file. +enum BlockId { + BI_VERSION_BLOCK_ID = llvm::bitc::FIRST_APPLICATION_BLOCKID, + BI_NAMESPACE_BLOCK_ID, + BI_ENUM_BLOCK_ID, + BI_TYPE_BLOCK_ID, + BI_FIELD_TYPE_BLOCK_ID, + BI_MEMBER_TYPE_BLOCK_ID, + BI_RECORD_BLOCK_ID, + BI_FUNCTION_BLOCK_ID, + BI_COMMENT_BLOCK_ID, + BI_FIRST = BI_VERSION_BLOCK_ID, + BI_LAST = BI_COMMENT_BLOCK_ID +}; + +// New Ids need to be added to the enum here, the relevant IdNameMap in +// the implementation file, and the initialization list in emitBlockInfoBlock(). +#define INFORECORDS(X) X##_USR, X##_NAME, X##_NAMESPACE + +enum RecordId { + VERSION = 1, + COMMENT_KIND, + COMMENT_TEXT, + COMMENT_NAME, + COMMENT_POSITION, + COMMENT_DIRECTION, + COMMENT_PARAMNAME, + COMMENT_CLOSENAME, + COMMENT_SELFCLOSING, + COMMENT_EXPLICIT, + COMMENT_ATTRKEY, + COMMENT_ATTRVAL, + COMMENT_ARG, + TYPE_TYPE, + FIELD_TYPE_TYPE, + FIELD_TYPE_NAME, + MEMBER_TYPE_TYPE, + MEMBER_TYPE_NAME, + MEMBER_TYPE_ACCESS, + INFORECORDS(NAMESPACE), + INFORECORDS(ENUM), + ENUM_ISDEFINITION, + ENUM_DEFLOCATION, + ENUM_LOCATION, + ENUM_SCOPED, + INFORECORDS(RECORD), + RECORD_ISDEFINITION, + RECORD_DEFLOCATION, + RECORD_LOCATION, + RECORD_TAG_TYPE, + RECORD_PARENT, + RECORD_VPARENT, + INFORECORDS(FUNCTION), + FUNCTION_ISDEFINITION, + FUNCTION_DEFLOCATION, + FUNCTION_LOCATION, + FUNCTION_PARENT, + FUNCTION_ACCESS, + RI_FIRST = VERSION, + RI_LAST = FUNCTION_ACCESS +}; + +static constexpr unsigned BlockIdCount = BI_LAST - BI_FIRST + 1; +static constexpr unsigned RecordIdCount = RI_LAST - RI_FIRST + 1; + +#undef INFORECORDS + +template +struct MapFromInfoToBlockId { + static const BlockId ID; +}; + +template <> +struct MapFromInfoToBlockId { + static const BlockId ID = BI_NAMESPACE_BLOCK_ID; +}; + +template <> +struct MapFromInfoToBlockId { + static const BlockId ID = BI_ENUM_BLOCK_ID; +}; + +template <> +struct MapFromInfoToBlockId { + static const BlockId ID = BI_RECORD_BLOCK_ID; +}; + +template <> +struct MapFromInfoToBlockId { + static const BlockId ID = BI_FUNCTION_BLOCK_ID; +}; + +template <> +struct MapFromInfoToBlockId { + static const BlockId ID = BI_TYPE_BLOCK_ID; +}; + +template <> +struct MapFromInfoToBlockId { + static const BlockId ID = BI_FIELD_TYPE_BLOCK_ID; +}; + +template <> +struct MapFromInfoToBlockId { + static const BlockId ID = BI_MEMBER_TYPE_BLOCK_ID; +}; + +template <> +struct MapFromInfoToBlockId { + static const BlockId ID = BI_COMMENT_BLOCK_ID; +}; + +class ClangDocBitcodeWriter { + public: + ClangDocBitcodeWriter(llvm::BitstreamWriter &Stream, + bool OmitFilenames = false) + : Stream(Stream), OmitFilenames(OmitFilenames) {} + + using RecordData = SmallVector; + + template + void writeBitstream(const T &I, bool WriteBlockInfo = false); + + private: + class AbbreviationMap { + llvm::DenseMap Abbrevs; + + public: + AbbreviationMap() : Abbrevs(RecordIdCount) {} + void add(RecordId RID, unsigned AbbrevID); + unsigned get(RecordId RID) const; + }; + + class StreamSubBlockGuard { + llvm::BitstreamWriter &Stream; + + public: + StreamSubBlockGuard(llvm::BitstreamWriter &Stream_, BlockId ID) + : Stream(Stream_) { + Stream.EnterSubblock(ID, BitCodeConstants::SubblockIDSize); + } + + StreamSubBlockGuard() = default; + StreamSubBlockGuard(const StreamSubBlockGuard &) = delete; + StreamSubBlockGuard &operator=(const StreamSubBlockGuard &) = delete; + + ~StreamSubBlockGuard() { Stream.ExitBlock(); } + }; + + // Block emission of different info types + void emitBlock(const NamespaceInfo &I); + void emitBlock(const RecordInfo &I); + void emitBlock(const FunctionInfo &I); + void emitBlock(const EnumInfo &I); + void emitBlock(const TypeInfo &B); + void emitBlock(const FieldTypeInfo &B); + void emitBlock(const MemberTypeInfo &B); + void emitBlock(const CommentInfo &B); + + // Emission of validation and overview blocks + void emitHeader(); + void emitVersion(); + void emitRecordID(RecordId ID); + void emitBlockID(BlockId ID); + void emitBlockInfoBlock(); + void emitBlockInfo(BlockId BID, const std::initializer_list &RIDs); + + // Emission of individual record types + void emitRecord(StringRef Str, RecordId ID); + void emitRecord(const Location &Loc, RecordId ID); + void emitRecord(int Value, RecordId ID); + bool prepRecordData(RecordId ID, bool ShouldEmit = true); + + // Emission of different abbreviation types + void emitAbbrev(RecordId ID, BlockId Block); + + RecordData Record; + llvm::BitstreamWriter &Stream; + bool OmitFilenames; + AbbreviationMap Abbrevs; +}; + +/// \brief Entry point for writing an individual info to bitcode. +/// +/// \param I The info to emit to bitcode. +/// +/// \param WriteBlockInfo +/// For serializing a single info (as in the mapper +/// phase), this should be set to true (so that the appropriate abbreviations +/// and headers are written). Defaults to false, emitting only the block (and +/// not the header/blockinfo/version) to the stream. +template +void ClangDocBitcodeWriter::writeBitstream(const T &I, bool WriteBlockInfo) { + if (WriteBlockInfo) emitBlockInfoBlock(); + StreamSubBlockGuard Block(Stream, MapFromInfoToBlockId::ID); + emitBlock(I); +} + +} // namespace doc +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_CLANG_DOC_BITCODE_WRITER_H Index: clang-doc/BitcodeWriter.cpp =================================================================== --- /dev/null +++ clang-doc/BitcodeWriter.cpp @@ -0,0 +1,390 @@ +//===-- BitcodeWriter.cpp - ClangDoc Bitcode Writer ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "BitcodeWriter.h" +#include "llvm/ADT/IndexedMap.h" + +namespace clang { +namespace doc { + +// Since id enums are not zero-indexed, we need to transform the given id into +// its associated index. +struct BlockIdToIndexFunctor { + using argument_type = unsigned; + unsigned operator()(unsigned ID) const { return ID - BI_FIRST; } +}; + +struct RecordIdToIndexFunctor { + using argument_type = unsigned; + unsigned operator()(unsigned ID) const { return ID - RI_FIRST; } +}; + +using AbbrevDsc = void (*)(std::shared_ptr &Abbrev); + +static void IntAbbrev(std::shared_ptr &Abbrev) { + Abbrev->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Fixed, + BitCodeConstants::IntSize)); // Integer +} + +static void StringAbbrev(std::shared_ptr &Abbrev) { + Abbrev->Add(llvm::BitCodeAbbrevOp( + llvm::BitCodeAbbrevOp::Fixed, + BitCodeConstants::StringLengthSize)); // String size + Abbrev->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob)); // String +} + +// Assumes that the file will not have more than 65535 lines. +static void LocationAbbrev(std::shared_ptr &Abbrev) { + Abbrev->Add( + llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Fixed, + BitCodeConstants::LineNumberSize)); // Line number + Abbrev->Add(llvm::BitCodeAbbrevOp( + llvm::BitCodeAbbrevOp::Fixed, + BitCodeConstants::StringLengthSize)); // Filename size + Abbrev->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob)); // Filename +} + +struct RecordIdDsc { + llvm::StringRef Name; + AbbrevDsc Abbrev = nullptr; + + RecordIdDsc() = default; + RecordIdDsc(llvm::StringRef Name, AbbrevDsc Abbrev) + : Name(Name), Abbrev(Abbrev) {} + + // Is this 'description' valid? + operator bool() const { + return Abbrev != nullptr && Name.data() != nullptr && !Name.empty(); + } +}; + +static const llvm::IndexedMap + BlockIdNameMap = []() { + llvm::IndexedMap BlockIdNameMap; + BlockIdNameMap.resize(BlockIdCount); + + // There is no init-list constructor for the IndexedMap, so have to + // improvise + static constexpr std::initializer_list< + std::pair> + Inits = {{BI_VERSION_BLOCK_ID, "VersionBlock"}, + {BI_NAMESPACE_BLOCK_ID, "NamespaceBlock"}, + {BI_ENUM_BLOCK_ID, "EnumBlock"}, + {BI_TYPE_BLOCK_ID, "TypeBlock"}, + {BI_FIELD_TYPE_BLOCK_ID, "FieldTypeBlock"}, + {BI_MEMBER_TYPE_BLOCK_ID, "MemberTypeBlock"}, + {BI_RECORD_BLOCK_ID, "RecordBlock"}, + {BI_FUNCTION_BLOCK_ID, "FunctionBlock"}, + {BI_COMMENT_BLOCK_ID, "CommentBlock"}}; + static_assert(Inits.size() == BlockIdCount, + "unexpected count of initializers"); + for (const auto &Init : Inits) BlockIdNameMap[Init.first] = Init.second; + assert(BlockIdNameMap.size() == BlockIdCount); + return BlockIdNameMap; + }(); + +static const llvm::IndexedMap + RecordIdNameMap = []() { + llvm::IndexedMap RecordIdNameMap; + RecordIdNameMap.resize(RecordIdCount); + + // There is no init-list constructor for the IndexedMap, so have to + // improvise + static std::initializer_list> Inits = { + {VERSION, {"Version", &IntAbbrev}}, + {COMMENT_KIND, {"Kind", &StringAbbrev}}, + {COMMENT_TEXT, {"Text", &StringAbbrev}}, + {COMMENT_NAME, {"Name", &StringAbbrev}}, + {COMMENT_POSITION, {"Position", &StringAbbrev}}, + {COMMENT_DIRECTION, {"Direction", &StringAbbrev}}, + {COMMENT_PARAMNAME, {"ParamName", &StringAbbrev}}, + {COMMENT_CLOSENAME, {"CloseName", &StringAbbrev}}, + {COMMENT_SELFCLOSING, {"SelfClosing", &IntAbbrev}}, + {COMMENT_EXPLICIT, {"Explicit", &IntAbbrev}}, + {COMMENT_ATTRKEY, {"AttrKey", &StringAbbrev}}, + {COMMENT_ATTRVAL, {"AttrVal", &StringAbbrev}}, + {COMMENT_ARG, {"Arg", &StringAbbrev}}, + {TYPE_TYPE, {"Type", &StringAbbrev}}, + {FIELD_TYPE_TYPE, {"Type", &StringAbbrev}}, + {FIELD_TYPE_NAME, {"Name", &StringAbbrev}}, + {MEMBER_TYPE_TYPE, {"Type", &StringAbbrev}}, + {MEMBER_TYPE_NAME, {"Name", &StringAbbrev}}, + {MEMBER_TYPE_ACCESS, {"Access", &IntAbbrev}}, + {NAMESPACE_USR, {"USR", &StringAbbrev}}, + {NAMESPACE_NAME, {"Name", &StringAbbrev}}, + {NAMESPACE_NAMESPACE, {"Namespace", &StringAbbrev}}, + {ENUM_USR, {"USR", &StringAbbrev}}, + {ENUM_NAME, {"Name", &StringAbbrev}}, + {ENUM_NAMESPACE, {"Namespace", &StringAbbrev}}, + {ENUM_ISDEFINITION, {"IsDefinition", &IntAbbrev}}, + {ENUM_DEFLOCATION, {"DefLocation", &LocationAbbrev}}, + {ENUM_LOCATION, {"Location", &LocationAbbrev}}, + {ENUM_SCOPED, {"Scoped", &IntAbbrev}}, + {RECORD_USR, {"USR", &StringAbbrev}}, + {RECORD_NAME, {"Name", &StringAbbrev}}, + {RECORD_NAMESPACE, {"Namespace", &StringAbbrev}}, + {RECORD_ISDEFINITION, {"IsDefinition", &IntAbbrev}}, + {RECORD_DEFLOCATION, {"DefLocation", &LocationAbbrev}}, + {RECORD_LOCATION, {"Location", &LocationAbbrev}}, + {RECORD_TAG_TYPE, {"TagType", &IntAbbrev}}, + {RECORD_PARENT, {"Parent", &StringAbbrev}}, + {RECORD_VPARENT, {"VParent", &StringAbbrev}}, + {FUNCTION_USR, {"USR", &StringAbbrev}}, + {FUNCTION_NAME, {"Name", &StringAbbrev}}, + {FUNCTION_NAMESPACE, {"Namespace", &StringAbbrev}}, + {FUNCTION_ISDEFINITION, {"IsDefinition", &IntAbbrev}}, + {FUNCTION_DEFLOCATION, {"DefLocation", &LocationAbbrev}}, + {FUNCTION_LOCATION, {"Location", &LocationAbbrev}}, + {FUNCTION_PARENT, {"Parent", &StringAbbrev}}, + {FUNCTION_ACCESS, {"Access", &IntAbbrev}}}; + assert(Inits.size() == RecordIdCount); + for (const auto &Init : Inits) RecordIdNameMap[Init.first] = Init.second; + assert(RecordIdNameMap.size() == RecordIdCount); + return RecordIdNameMap; + }(); + +// AbbreviationMap + +void ClangDocBitcodeWriter::AbbreviationMap::add(RecordId RID, + unsigned AbbrevID) { + assert(RecordIdNameMap[RID] && "Unknown Abbreviation"); + assert(Abbrevs.find(RID) == Abbrevs.end() && "Abbreviation already added."); + Abbrevs[RID] = AbbrevID; +} + +unsigned ClangDocBitcodeWriter::AbbreviationMap::get(RecordId RID) const { + assert(RecordIdNameMap[RID] && "Unknown Abbreviation"); + assert(Abbrevs.find(RID) != Abbrevs.end() && "Unknown abbreviation."); + return Abbrevs.lookup(RID); +} + +// Validation and Overview Blocks + +/// \brief Emits the magic number header to check that its the right format, +/// in this case, 'DOCS'. +void ClangDocBitcodeWriter::emitHeader() { + for (char C : llvm::StringRef("DOCS")) + Stream.Emit((unsigned)C, BitCodeConstants::SignatureBitSize); +} + +void ClangDocBitcodeWriter::emitVersion() { + StreamSubBlockGuard Block(Stream, BI_VERSION_BLOCK_ID); + emitRecord(VERSION_NUMBER, VERSION); +} + +/// \brief Emits a block ID and the block name to the BLOCKINFO block. +void ClangDocBitcodeWriter::emitBlockID(BlockId ID) { + const auto &BlockIdName = BlockIdNameMap[ID]; + assert(BlockIdName.data() && BlockIdName.size() && "Unknown BlockId!"); + + Record.clear(); + Record.push_back(ID); + Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETBID, Record); + + Record.clear(); + for (const char C : BlockIdNameMap[ID]) Record.push_back(C); + Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_BLOCKNAME, Record); +} + +/// \brief Emits a record name to the BLOCKINFO block. +void ClangDocBitcodeWriter::emitRecordID(RecordId ID) { + assert(RecordIdNameMap[ID] && "Unknown Abbreviation"); + prepRecordData(ID); + for (const char C : RecordIdNameMap[ID].Name) Record.push_back(C); + Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETRECORDNAME, Record); +} + +// Abbreviations + +void ClangDocBitcodeWriter::emitAbbrev(RecordId ID, BlockId Block) { + assert(RecordIdNameMap[ID] && "Unknown Abbreviation"); + auto Abbrev = std::make_shared(); + Abbrev->Add(llvm::BitCodeAbbrevOp(ID)); + RecordIdNameMap[ID].Abbrev(Abbrev); + Abbrevs.add(ID, Stream.EmitBlockInfoAbbrev(Block, std::move(Abbrev))); +} + +// Records + +void ClangDocBitcodeWriter::emitRecord(llvm::StringRef Str, RecordId ID) { + assert(RecordIdNameMap[ID] && "Unknown Abbreviation"); + assert(RecordIdNameMap[ID].Abbrev == &StringAbbrev && "Abbrev type mismatch"); + if (!prepRecordData(ID, !Str.empty())) return; + assert(Str.size() < (1U << BitCodeConstants::StringLengthSize)); + Record.push_back(Str.size()); + Stream.EmitRecordWithBlob(Abbrevs.get(ID), Record, Str); +} + +void ClangDocBitcodeWriter::emitRecord(const Location &Loc, RecordId ID) { + assert(RecordIdNameMap[ID] && "Unknown Abbreviation"); + assert(RecordIdNameMap[ID].Abbrev == &LocationAbbrev && + "Abbrev type mismatch"); + if (!prepRecordData(ID, !OmitFilenames)) return; + assert(Loc.LineNumber < (1U << BitCodeConstants::LineNumberSize)); + Record.push_back(Loc.LineNumber); + assert(Loc.Filename.size() < (1U << BitCodeConstants::StringLengthSize)); + Record.push_back(Loc.Filename.size()); + Stream.EmitRecordWithBlob(Abbrevs.get(ID), Record, Loc.Filename); +} + +void ClangDocBitcodeWriter::emitRecord(int Val, RecordId ID) { + assert(RecordIdNameMap[ID] && "Unknown Abbreviation"); + assert(RecordIdNameMap[ID].Abbrev == &IntAbbrev && "Abbrev type mismatch"); + if (!prepRecordData(ID, Val)) return; + assert(Val < (1U << BitCodeConstants::IntSize)); + Record.push_back(Val); + Stream.EmitRecordWithAbbrev(Abbrevs.get(ID), Record); +} + +bool ClangDocBitcodeWriter::prepRecordData(RecordId ID, bool ShouldEmit) { + assert(RecordIdNameMap[ID] && "Unknown Abbreviation"); + if (!ShouldEmit) return false; + Record.clear(); + Record.push_back(ID); + return true; +} + +// BlockInfo Block + +void ClangDocBitcodeWriter::emitBlockInfoBlock() { + emitHeader(); + Stream.EnterBlockInfoBlock(); + + static const std::initializer_list< + std::pair>> + TheBlocks{// Version Block + {BI_VERSION_BLOCK_ID, {VERSION}}, + // Comment Block + {BI_COMMENT_BLOCK_ID, + {COMMENT_KIND, COMMENT_TEXT, COMMENT_NAME, COMMENT_DIRECTION, + COMMENT_PARAMNAME, COMMENT_CLOSENAME, COMMENT_SELFCLOSING, + COMMENT_EXPLICIT, COMMENT_ATTRKEY, COMMENT_ATTRVAL, + COMMENT_ARG, COMMENT_POSITION}}, + // Type Block + {BI_TYPE_BLOCK_ID, {TYPE_TYPE}}, + // FieldType Block + {BI_FIELD_TYPE_BLOCK_ID, {FIELD_TYPE_TYPE, FIELD_TYPE_NAME}}, + // MemberType Block + {BI_MEMBER_TYPE_BLOCK_ID, + {MEMBER_TYPE_TYPE, MEMBER_TYPE_NAME, MEMBER_TYPE_ACCESS}}, + // Enum Block + {BI_ENUM_BLOCK_ID, + {ENUM_USR, ENUM_NAME, ENUM_NAMESPACE, ENUM_ISDEFINITION, + ENUM_DEFLOCATION, ENUM_LOCATION, ENUM_SCOPED}}, + // Namespace Block + {BI_NAMESPACE_BLOCK_ID, + {NAMESPACE_USR, NAMESPACE_NAME, NAMESPACE_NAMESPACE}}, + // Record Block + {BI_RECORD_BLOCK_ID, + {RECORD_USR, RECORD_NAME, RECORD_NAMESPACE, + RECORD_ISDEFINITION, RECORD_DEFLOCATION, RECORD_LOCATION, + RECORD_TAG_TYPE, RECORD_PARENT, RECORD_VPARENT}}, + // Function Block + {BI_FUNCTION_BLOCK_ID, + {FUNCTION_USR, FUNCTION_NAME, FUNCTION_NAMESPACE, + FUNCTION_ISDEFINITION, FUNCTION_DEFLOCATION, + FUNCTION_LOCATION, FUNCTION_PARENT, FUNCTION_ACCESS}}}; + + for (const auto &Block : TheBlocks) emitBlockInfo(Block.first, Block.second); + + Stream.ExitBlock(); + emitVersion(); +} + +void ClangDocBitcodeWriter::emitBlockInfo( + BlockId BID, const std::initializer_list &RIDs) { + emitBlockID(BID); + for (RecordId RID : RIDs) { + emitRecordID(RID); + emitAbbrev(RID, BID); + } +} + +// Block emission + +void ClangDocBitcodeWriter::emitBlock(const TypeInfo &T) { + emitRecord(T.Type.USR, TYPE_TYPE); + for (const auto &CI : T.Description) writeBitstream(CI); +} + +void ClangDocBitcodeWriter::emitBlock(const FieldTypeInfo &T) { + emitRecord(T.Type.USR, FIELD_TYPE_TYPE); + emitRecord(T.Name, FIELD_TYPE_NAME); + for (const auto &CI : T.Description) writeBitstream(CI); +} + +void ClangDocBitcodeWriter::emitBlock(const MemberTypeInfo &T) { + emitRecord(T.Type.USR, MEMBER_TYPE_TYPE); + emitRecord(T.Name, MEMBER_TYPE_NAME); + emitRecord(T.Access, MEMBER_TYPE_ACCESS); + for (const auto &CI : T.Description) writeBitstream(CI); +} + +void ClangDocBitcodeWriter::emitBlock(const CommentInfo &I) { + for (const auto &L : + std::initializer_list>{ + {I.Text, COMMENT_TEXT}, + {I.Name, COMMENT_NAME}, + {I.Direction, COMMENT_DIRECTION}, + {I.ParamName, COMMENT_PARAMNAME}, + {I.CloseName, COMMENT_CLOSENAME}}) + emitRecord(L.first, L.second); + emitRecord(I.SelfClosing, COMMENT_SELFCLOSING); + emitRecord(I.Explicit, COMMENT_EXPLICIT); + for (const auto &A : I.AttrKeys) emitRecord(A, COMMENT_ATTRKEY); + for (const auto &A : I.AttrValues) emitRecord(A, COMMENT_ATTRVAL); + for (const auto &A : I.Args) emitRecord(A, COMMENT_ARG); + for (const auto &P : I.Position) emitRecord(P, COMMENT_POSITION); + for (const auto &C : I.Children) writeBitstream(*C); +} + +#define EMITINFO(X) \ + emitRecord(I.USR, X##_USR); \ + emitRecord(I.Name, X##_NAME); \ + for (const auto &N : I.Namespace) emitRecord(N.USR, X##_NAMESPACE); \ + for (const auto &CI : I.Description) writeBitstream(CI); + +void ClangDocBitcodeWriter::emitBlock(const NamespaceInfo &I) { + EMITINFO(NAMESPACE) +} + +void ClangDocBitcodeWriter::emitBlock(const EnumInfo &I) { + EMITINFO(ENUM) + emitRecord(I.IsDefinition, ENUM_ISDEFINITION); + if (I.IsDefinition) emitRecord(I.DefLoc, ENUM_DEFLOCATION); + for (const auto &L : I.Loc) emitRecord(L, ENUM_LOCATION); + emitRecord(I.Scoped, ENUM_SCOPED); + for (const auto &N : I.Members) writeBitstream(N); +} + +void ClangDocBitcodeWriter::emitBlock(const RecordInfo &I) { + EMITINFO(RECORD) + emitRecord(I.IsDefinition, RECORD_ISDEFINITION); + if (I.IsDefinition) emitRecord(I.DefLoc, RECORD_DEFLOCATION); + for (const auto &L : I.Loc) emitRecord(L, RECORD_LOCATION); + emitRecord(I.TagType, RECORD_TAG_TYPE); + for (const auto &N : I.Members) writeBitstream(N); + for (const auto &P : I.Parents) emitRecord(P.USR, RECORD_PARENT); + for (const auto &P : I.VirtualParents) emitRecord(P.USR, RECORD_VPARENT); +} + +void ClangDocBitcodeWriter::emitBlock(const FunctionInfo &I) { + EMITINFO(FUNCTION) + emitRecord(I.IsDefinition, FUNCTION_ISDEFINITION); + if (I.IsDefinition) emitRecord(I.DefLoc, FUNCTION_DEFLOCATION); + for (const auto &L : I.Loc) emitRecord(L, FUNCTION_LOCATION); + emitRecord(I.Parent.USR, FUNCTION_PARENT); + writeBitstream(I.ReturnType); + for (const auto &N : I.Params) writeBitstream(N); +} + +#undef EMITINFO + +} // namespace doc +} // namespace clang Index: clang-doc/CMakeLists.txt =================================================================== --- /dev/null +++ clang-doc/CMakeLists.txt @@ -0,0 +1,21 @@ +set(LLVM_LINK_COMPONENTS + support + ) + +add_clang_library(clangDoc + BitcodeWriter.cpp + Mapper.cpp + + LINK_LIBS + clangAnalysis + clangAST + clangASTMatchers + clangBasic + clangFrontend + clangIndex + clangLex + clangTooling + clangToolingCore + ) + +add_subdirectory(tool) Index: clang-doc/ClangDoc.h =================================================================== --- /dev/null +++ clang-doc/ClangDoc.h @@ -0,0 +1,81 @@ +//===-- ClangDoc.h - ClangDoc -----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the main entry point for the clang-doc tool. It runs +// the clang-doc mapper on a given set of source code files using a +// FrontendActionFactory. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_CLANGDOC_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_CLANGDOC_H + +#include +#include +#include "BitcodeWriter.h" +#include "Mapper.h" +#include "clang/AST/AST.h" +#include "clang/AST/ASTConsumer.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Comment.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/Frontend/ASTConsumers.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Frontend/FrontendActions.h" +#include "clang/Tooling/Tooling.h" +#include "llvm/Support/raw_ostream.h" + +namespace clang { +namespace doc { + +class MapperActionFactory : public tooling::FrontendActionFactory { + public: + MapperActionFactory(tooling::ExecutionContext *ECtx, bool OmitFilenames) + : ECtx(ECtx), OmitFilenames(OmitFilenames) {} + + clang::FrontendAction *create() override { + class ClangDocConsumer : public clang::ASTConsumer { + public: + ClangDocConsumer(ASTContext *Ctx, ExecutionContext *ECtx, + bool OmitFilenames) + : Mapper(Ctx, ECtx, OmitFilenames){}; + void HandleTranslationUnit(clang::ASTContext &Context) override { + Mapper.TraverseDecl(Context.getTranslationUnitDecl()); + } + + private: + ClangDocMapper Mapper; + }; + + class ClangDocAction : public clang::ASTFrontendAction { + public: + ClangDocAction(ExecutionContext *ECtx, bool OmitFilenames) + : ECtx(ECtx), OmitFilenames(OmitFilenames) {} + + std::unique_ptr CreateASTConsumer( + clang::CompilerInstance &Compiler, llvm::StringRef InFile) override { + return llvm::make_unique(&Compiler.getASTContext(), + ECtx, OmitFilenames); + } + + private: + ExecutionContext *ECtx; + bool OmitFilenames; + }; + return new ClangDocAction(ECtx, OmitFilenames); + } + + tooling::ExecutionContext *ECtx; + bool OmitFilenames; +}; + +} // namespace doc +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_CLANGDOC_H Index: clang-doc/Mapper.h =================================================================== --- /dev/null +++ clang-doc/Mapper.h @@ -0,0 +1,140 @@ +//===-- Mapper.h - ClangDoc Mapper ------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Mapper piece of the clang-doc tool. It implements +// a RecursiveASTVisitor to look at each declaration and populate the info +// into the internal representation. Each seen declaration is serialized to +// to bitcode and written out to the ExecutionContext as a KV pair where the +// key is the declaration's USR and the value is the serialized bitcode. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_CLANG_DOC_MAPPER_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_CLANG_DOC_MAPPER_H + +#include +#include +#include +#include "Representation.h" +#include "clang/AST/AST.h" +#include "clang/AST/ASTConsumer.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/CommentVisitor.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/Frontend/ASTConsumers.h" +#include "clang/Frontend/FrontendActions.h" +#include "clang/Tooling/Execution.h" +#include "clang/Tooling/Tooling.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang::comments; +using namespace clang::tooling; + +namespace clang { +namespace doc { + +class ClangDocMapper : public clang::RecursiveASTVisitor { + public: + explicit ClangDocMapper(ASTContext *Ctx, ExecutionContext *ECtx, + bool OmitFilenames) + : ECtx(ECtx), Serializer(OmitFilenames) {} + + bool VisitNamespaceDecl(const NamespaceDecl *D); + bool VisitRecordDecl(const RecordDecl *D); + bool VisitEnumDecl(const EnumDecl *D); + bool VisitCXXMethodDecl(const CXXMethodDecl *D); + bool VisitFunctionDecl(const FunctionDecl *D); + + private: + class ClangDocCommentVisitor + : public ConstCommentVisitor { + public: + ClangDocCommentVisitor(CommentInfo &CI) : CurrentCI(CI) {} + + void parseComment(const comments::Comment *C); + + void visitTextComment(const TextComment *C); + void visitInlineCommandComment(const InlineCommandComment *C); + void visitHTMLStartTagComment(const HTMLStartTagComment *C); + void visitHTMLEndTagComment(const HTMLEndTagComment *C); + void visitBlockCommandComment(const BlockCommandComment *C); + void visitParamCommandComment(const ParamCommandComment *C); + void visitTParamCommandComment(const TParamCommandComment *C); + void visitVerbatimBlockComment(const VerbatimBlockComment *C); + void visitVerbatimBlockLineComment(const VerbatimBlockLineComment *C); + void visitVerbatimLineComment(const VerbatimLineComment *C); + + private: + std::string getCommandName(unsigned CommandID) const; + bool isWhitespaceOnly(StringRef S) const; + + CommentInfo &CurrentCI; + }; + + class ClangDocSerializer { + public: + ClangDocSerializer(bool OmitFilenames) : OmitFilenames(OmitFilenames) {} + + std::string emitInfo(const NamespaceDecl *D, const FullComment *FC, + int LineNumber, StringRef File); + std::string emitInfo(const RecordDecl *D, const FullComment *FC, + int LineNumber, StringRef File); + std::string emitInfo(const EnumDecl *D, const FullComment *FC, + int LineNumber, StringRef File); + std::string emitInfo(const FunctionDecl *D, const FullComment *FC, + int LineNumber, StringRef File); + std::string emitInfo(const CXXMethodDecl *D, const FullComment *FC, + int LineNumber, StringRef File); + + private: + template + std::string serialize(T &I); + + template + void populateInfo(Info &I, const T *D, const FullComment *C); + template + void populateSymbolInfo(SymbolInfo &I, const T *D, const FullComment *C, + int LineNumber, StringRef Filename); + void populateFunctionInfo(FunctionInfo &I, const FunctionDecl *D, + const FullComment *C, int LineNumber, + StringRef Filename); + + void parseFields(RecordInfo &I, const RecordDecl *D) const; + void parseEnumerators(EnumInfo &I, const EnumDecl *D) const; + void parseBases(RecordInfo &I, const CXXRecordDecl *D) const; + void parseParameters(FunctionInfo &I, const FunctionDecl *D) const; + void parseFullComment(const FullComment *C, CommentInfo &CI); + + template + void populateParentNamespaces(llvm::SmallVector &Namespaces, + const T *D); + std::string getUSRForType(const Type *T) const; + std::string getUSRForDecl(const Decl *D) const; + RecordDecl *getDeclForType(const QualType &T) const; + + bool OmitFilenames; + }; + + template + bool mapDecl(const T *D); + + int getLine(const NamedDecl *D, const ASTContext &Context) const; + StringRef getFile(const NamedDecl *D, const ASTContext &Context) const; + comments::FullComment *getComment(const NamedDecl *D, + const ASTContext &Context) const; + + ExecutionContext *ECtx; + ClangDocSerializer Serializer; +}; + +} // namespace doc +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_CLANG_DOC_MAPPER_H Index: clang-doc/Mapper.cpp =================================================================== --- /dev/null +++ clang-doc/Mapper.cpp @@ -0,0 +1,370 @@ +//===-- Mapper.cpp - ClangDoc Mapper ----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Mapper.h" +#include "BitcodeWriter.h" +#include "clang/AST/Comment.h" +#include "clang/AST/Mangle.h" +#include "clang/Index/USRGeneration.h" + +using clang::comments::FullComment; + +namespace clang { +namespace doc { + +// ClangDocMapper::ClangDocSerializer + +std::string ClangDocMapper::ClangDocSerializer::emitInfo(const NamespaceDecl *D, + const FullComment *FC, + int LineNumber, + llvm::StringRef File) { + NamespaceInfo I; + populateInfo(I, D, FC); + return serialize(I); +} + +std::string ClangDocMapper::ClangDocSerializer::emitInfo(const RecordDecl *D, + const FullComment *FC, + int LineNumber, + llvm::StringRef File) { + RecordInfo I; + populateSymbolInfo(I, D, FC, LineNumber, File); + I.TagType = D->getTagKind(); + parseFields(I, D); + return serialize(I); +} + +std::string ClangDocMapper::ClangDocSerializer::emitInfo(const FunctionDecl *D, + const FullComment *FC, + int LineNumber, + llvm::StringRef File) { + FunctionInfo I; + populateFunctionInfo(I, D, FC, LineNumber, File); + I.Access = clang::AccessSpecifier::AS_none; + return serialize(I); +} + +std::string ClangDocMapper::ClangDocSerializer::emitInfo(const CXXMethodDecl *D, + const FullComment *FC, + int LineNumber, + llvm::StringRef File) { + FunctionInfo I; + populateFunctionInfo(I, D, FC, LineNumber, File); + I.Parent.USR = getUSRForDecl(D->getParent()); + I.Access = D->getAccess(); + return serialize(I); +} + +std::string ClangDocMapper::ClangDocSerializer::emitInfo(const EnumDecl *D, + const FullComment *FC, + int LineNumber, + llvm::StringRef File) { + EnumInfo I; + populateSymbolInfo(I, D, FC, LineNumber, File); + I.Scoped = D->isScoped(); + parseEnumerators(I, D); + return serialize(I); +} + +template +std::string ClangDocMapper::ClangDocSerializer::serialize(T &I) { + SmallString<2048> Buffer; + llvm::BitstreamWriter Stream(Buffer); + ClangDocBitcodeWriter Writer(Stream, OmitFilenames); + Writer.writeBitstream(I, /*WriteBlockInfo=*/true); + return Buffer.str().str(); +} + +void ClangDocMapper::ClangDocSerializer::parseFullComment(const FullComment *C, + CommentInfo &CI) { + ClangDocCommentVisitor Visitor(CI); + Visitor.parseComment(C); +} + +template +void ClangDocMapper::ClangDocSerializer::populateInfo(Info &I, const T *D, + const FullComment *C) { + I.USR = getUSRForDecl(D); + I.Name = D->getNameAsString(); + populateParentNamespaces(I.Namespace, D); + if (C) { + I.Description.emplace_back(); + parseFullComment(C, I.Description.back()); + } +} + +template +void ClangDocMapper::ClangDocSerializer::populateSymbolInfo( + SymbolInfo &I, const T *D, const FullComment *C, int LineNumber, + StringRef Filename) { + populateInfo(I, D, C); + if (D->isThisDeclarationADefinition()) { + I.IsDefinition = true; + I.DefLoc.LineNumber = LineNumber; + I.DefLoc.Filename = Filename; + } else + I.Loc.emplace_back(LineNumber, Filename); +} + +void ClangDocMapper::ClangDocSerializer::populateFunctionInfo( + FunctionInfo &I, const FunctionDecl *D, const FullComment *FC, + int LineNumber, StringRef Filename) { + populateSymbolInfo(I, D, FC, LineNumber, Filename); + if (const auto *T = getDeclForType(D->getReturnType())) + I.ReturnType.Type.USR = getUSRForDecl(T); + else + I.ReturnType.Type.USR = D->getReturnType().getAsString(); + // TODO: Populate return type comment description from info description. + parseParameters(I, D); +} + +void ClangDocMapper::ClangDocSerializer::parseFields( + RecordInfo &I, const RecordDecl *D) const { + for (const FieldDecl *F : D->fields()) { + // FIXME: Set Access to the appropriate value. + std::string Type; + if (const auto *D = getDeclForType(F->getTypeSourceInfo()->getType())) + Type = getUSRForDecl(D); + else + Type = F->getTypeSourceInfo()->getType().getAsString(); + I.Members.emplace_back(Type, F->getQualifiedNameAsString()); + } + // TODO: Populate type comment description from info description. +} + +void ClangDocMapper::ClangDocSerializer::parseEnumerators( + EnumInfo &I, const EnumDecl *D) const { + for (const EnumConstantDecl *E : D->enumerators()) + I.Members.emplace_back(E->getQualifiedNameAsString()); + // TODO: Populate member comment description from info description. +} + +void ClangDocMapper::ClangDocSerializer::parseParameters( + FunctionInfo &I, const FunctionDecl *D) const { + for (const ParmVarDecl *P : D->parameters()) { + std::string Type; + if (const auto *D = getDeclForType(P->getOriginalType())) + Type = getUSRForDecl(D); + else + Type = P->getOriginalType().getAsString(); + I.Params.emplace_back(Type, P->getQualifiedNameAsString()); + // TODO: Populate field comment description from info description. + } +} + +void ClangDocMapper::ClangDocSerializer::parseBases( + RecordInfo &I, const CXXRecordDecl *D) const { + for (const CXXBaseSpecifier &B : D->bases()) { + if (B.isVirtual()) continue; + if (const auto *P = getDeclForType(B.getType())) + I.Parents.emplace_back(getUSRForDecl(P)); + else + I.Parents.emplace_back(B.getType().getAsString()); + } + for (const CXXBaseSpecifier &B : D->vbases()) { + if (const auto *P = getDeclForType(B.getType())) + I.VirtualParents.emplace_back(getUSRForDecl(P)); + else + I.Parents.emplace_back(B.getType().getAsString()); + } +} + +RecordDecl *ClangDocMapper::ClangDocSerializer::getDeclForType( + const QualType &T) const { + auto *Ty = T->getAs(); + if (!Ty) return nullptr; + return Ty->getDecl()->getDefinition(); +} + +template +void ClangDocMapper::ClangDocSerializer::populateParentNamespaces( + llvm::SmallVector &Namespaces, const T *D) { + const NamespaceDecl *N; + // Get the initial namespace parent from passed-in decl. + if (const auto *C = dyn_cast(D)) { + N = dyn_cast(C->getParent()); + if (!N) return; + Namespaces.emplace_back(getUSRForDecl(N)); + } + + // Get all parents until the top level namespace. + while (const auto *C = dyn_cast(N)) { + N = dyn_cast(C->getParent()); + if (!N) break; + Namespaces.emplace_back(getUSRForDecl(N)); + } +} + +std::string ClangDocMapper::ClangDocSerializer::getUSRForDecl( + const Decl *D) const { + llvm::SmallString<128> USR; + if (index::generateUSRForDecl(D, USR)) return ""; + return USR.str().str(); +} + +// ClangDocCommentVisitor + +void ClangDocMapper::ClangDocCommentVisitor::parseComment( + const comments::Comment *C) { + CurrentCI.Kind = C->getCommentKindName(); + ConstCommentVisitor::visit(C); + for (comments::Comment *Child : + llvm::make_range(C->child_begin(), C->child_end())) { + CurrentCI.Children.emplace_back(llvm::make_unique()); + ClangDocCommentVisitor Visitor(*CurrentCI.Children.back()); + Visitor.parseComment(Child); + } +} + +void ClangDocMapper::ClangDocCommentVisitor::visitTextComment( + const TextComment *C) { + if (!isWhitespaceOnly(C->getText())) CurrentCI.Text = C->getText(); +} + +void ClangDocMapper::ClangDocCommentVisitor::visitInlineCommandComment( + const InlineCommandComment *C) { + CurrentCI.Name = getCommandName(C->getCommandID()); + for (unsigned I = 0, E = C->getNumArgs(); I != E; ++I) + CurrentCI.Args.push_back(C->getArgText(I)); +} + +void ClangDocMapper::ClangDocCommentVisitor::visitHTMLStartTagComment( + const HTMLStartTagComment *C) { + CurrentCI.Name = C->getTagName(); + CurrentCI.SelfClosing = C->isSelfClosing(); + for (unsigned I = 0, E = C->getNumAttrs(); I < E; ++I) { + const HTMLStartTagComment::Attribute &Attr = C->getAttr(I); + CurrentCI.AttrKeys.push_back(Attr.Name); + CurrentCI.AttrValues.push_back(Attr.Value); + } +} + +void ClangDocMapper::ClangDocCommentVisitor::visitHTMLEndTagComment( + const HTMLEndTagComment *C) { + CurrentCI.Name = C->getTagName(); + CurrentCI.SelfClosing = true; +} + +void ClangDocMapper::ClangDocCommentVisitor::visitBlockCommandComment( + const BlockCommandComment *C) { + CurrentCI.Name = getCommandName(C->getCommandID()); + for (unsigned i = 0, e = C->getNumArgs(); i < e; ++i) + CurrentCI.Args.push_back(C->getArgText(i)); +} + +void ClangDocMapper::ClangDocCommentVisitor::visitParamCommandComment( + const ParamCommandComment *C) { + CurrentCI.Direction = + ParamCommandComment::getDirectionAsString(C->getDirection()); + CurrentCI.Explicit = C->isDirectionExplicit(); + if (C->hasParamName() && C->isParamIndexValid()) + CurrentCI.ParamName = C->getParamNameAsWritten(); +} + +void ClangDocMapper::ClangDocCommentVisitor::visitTParamCommandComment( + const TParamCommandComment *C) { + if (C->hasParamName() && C->isPositionValid()) + CurrentCI.ParamName = C->getParamNameAsWritten(); + + if (C->isPositionValid()) { + for (unsigned i = 0, e = C->getDepth(); i < e; ++i) + CurrentCI.Position.push_back(std::to_string(C->getIndex(i))); + } +} + +void ClangDocMapper::ClangDocCommentVisitor::visitVerbatimBlockComment( + const VerbatimBlockComment *C) { + CurrentCI.Name = getCommandName(C->getCommandID()); + CurrentCI.CloseName = C->getCloseName(); +} + +void ClangDocMapper::ClangDocCommentVisitor::visitVerbatimBlockLineComment( + const VerbatimBlockLineComment *C) { + if (!isWhitespaceOnly(C->getText())) CurrentCI.Text = C->getText(); +} + +void ClangDocMapper::ClangDocCommentVisitor::visitVerbatimLineComment( + const VerbatimLineComment *C) { + if (!isWhitespaceOnly(C->getText())) CurrentCI.Text = C->getText(); +} + +std::string ClangDocMapper::ClangDocCommentVisitor::getCommandName( + unsigned CommandID) const { + const CommandInfo *Info = CommandTraits::getBuiltinCommandInfo(CommandID); + if (Info) return Info->Name; + // TODO: Add parsing for \file command. + return ""; +} + +bool ClangDocMapper::ClangDocCommentVisitor::isWhitespaceOnly( + llvm::StringRef S) const { + return std::all_of(S.begin(), S.end(), isspace); +} + +// ClangDocMapper + +template +bool ClangDocMapper::mapDecl(const T *D) { + if (!D->getASTContext().getSourceManager().isWrittenInMainFile( + D->getLocation())) + return false; + + llvm::SmallString<128> USR; + if (index::generateUSRForDecl(D, USR)) return false; + + ECtx->reportResult(USR, + Serializer.emitInfo(D, getComment(D, D->getASTContext()), + getLine(D, D->getASTContext()), + getFile(D, D->getASTContext()))); + return true; +} + +bool ClangDocMapper::VisitNamespaceDecl(const NamespaceDecl *D) { + return mapDecl(D); +} + +bool ClangDocMapper::VisitRecordDecl(const RecordDecl *D) { return mapDecl(D); } + +bool ClangDocMapper::VisitEnumDecl(const EnumDecl *D) { return mapDecl(D); } + +bool ClangDocMapper::VisitCXXMethodDecl(const CXXMethodDecl *D) { + return mapDecl(D); +} + +bool ClangDocMapper::VisitFunctionDecl(const FunctionDecl *D) { + // Don't visit CXXMethodDecls twice + if (dyn_cast(D)) return true; + return mapDecl(D); +} + +comments::FullComment *ClangDocMapper::getComment( + const NamedDecl *D, const ASTContext &Context) const { + RawComment *Comment = Context.getRawCommentForDeclNoCache(D); + // FIXME: Move setAttached to the initial comment parsing. + if (Comment) { + Comment->setAttached(); + return Comment->parse(Context, nullptr, D); + } + return nullptr; +} + +int ClangDocMapper::getLine(const NamedDecl *D, + const ASTContext &Context) const { + return Context.getSourceManager().getPresumedLoc(D->getLocStart()).getLine(); +} + +llvm::StringRef ClangDocMapper::getFile(const NamedDecl *D, + const ASTContext &Context) const { + return Context.getSourceManager() + .getPresumedLoc(D->getLocStart()) + .getFilename(); +} + +} // namespace doc +} // namespace clang Index: clang-doc/Representation.h =================================================================== --- /dev/null +++ clang-doc/Representation.h @@ -0,0 +1,144 @@ +///===-- Representation.h - ClangDoc Represenation --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the internal representations of different declaration +// types for the clang-doc tool. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_CLANG_DOC_REPRESENTATION_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_CLANG_DOC_REPRESENTATION_H + +#include +#include "clang/AST/Type.h" +#include "clang/Basic/Specifiers.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/SHA1.h" + +namespace clang { +namespace doc { + +struct Info; + +// A representation of a parsed comment. +struct CommentInfo { + CommentInfo() = default; + CommentInfo(CommentInfo&& Other) : Children(std::move(Other.Children)) {} + std::string Kind; + std::string Text; + std::string Name; + std::string Direction; + std::string ParamName; + std::string CloseName; + bool SelfClosing = false; + bool Explicit = false; + llvm::SmallVector AttrKeys; + llvm::SmallVector AttrValues; + llvm::SmallVector Args; + llvm::SmallVector Position; + std::vector> Children; +}; + +struct Reference { + Reference() = default; + Reference(StringRef USR) : USR(USR) {} + + std::string USR; + Info* Ref; +}; + +// TODO: Pull the CommentInfo for a type out of the info's CommentInfo. +// A base struct for TypeInfos +struct TypeInfo { + TypeInfo() = default; + TypeInfo(llvm::StringRef Type) : Type(Type) {} + + Reference Type; + std::vector Description; +}; + +// Info for field types. +struct FieldTypeInfo : public TypeInfo { + FieldTypeInfo() = default; + FieldTypeInfo(llvm::StringRef Type) : TypeInfo(Type) {} + FieldTypeInfo(llvm::StringRef Type, llvm::StringRef Name) + : TypeInfo(Type), Name(Name) {} + + std::string Name; +}; + +// Info for member types. +struct MemberTypeInfo : public FieldTypeInfo { + MemberTypeInfo() = default; + MemberTypeInfo(llvm::StringRef Type, llvm::StringRef Name) + : FieldTypeInfo(Type, Name) {} + + AccessSpecifier Access = clang::AccessSpecifier::AS_none; +}; + +struct Location { + Location() = default; + Location(int LineNumber, std::string Filename) + : LineNumber(LineNumber), Filename(std::move(Filename)) {} + + int LineNumber; + std::string Filename; +}; + +/// A base struct for Infos. +struct Info { + std::string USR; + std::string Name; + llvm::SmallVector Namespace; + std::vector Description; +}; + +struct NamespaceInfo : public Info {}; + +struct SymbolInfo : public Info { + bool IsDefinition = false; + Location DefLoc; + llvm::SmallVector Loc; +}; + +// TODO: Expand to allow for documenting templating and default args. +// Info for functions. +struct FunctionInfo : public SymbolInfo { + Reference Parent; + TypeInfo ReturnType; + llvm::SmallVector Params; + AccessSpecifier Access; +}; + +// TODO: Expand to allow for documenting templating, inheritance access, +// friend classes +// Info for types. +struct RecordInfo : public SymbolInfo { + TagTypeKind TagType; + llvm::SmallVector Members; + llvm::SmallVector Parents; + llvm::SmallVector VirtualParents; +}; + +// TODO: Expand to allow for documenting templating. +// Info for types. +struct EnumInfo : public SymbolInfo { + bool Scoped; + llvm::SmallVector Members; +}; + +// TODO: Add functionality to include separate markdown pages. + +} // namespace doc +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_CLANG_DOC_REPRESENTATION_H Index: clang-doc/tool/CMakeLists.txt =================================================================== --- /dev/null +++ clang-doc/tool/CMakeLists.txt @@ -0,0 +1,16 @@ +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/..) + +add_clang_executable(clang-doc + ClangDocMain.cpp + ) + +target_link_libraries(clang-doc + PRIVATE + clangAST + clangASTMatchers + clangBasic + clangFrontend + clangDoc + clangTooling + clangToolingCore + ) \ No newline at end of file Index: clang-doc/tool/ClangDocMain.cpp =================================================================== --- /dev/null +++ clang-doc/tool/ClangDocMain.cpp @@ -0,0 +1,115 @@ +//===-- ClangDocMain.cpp - ClangDoc -----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This tool for generating C and C++ documenation from source code +// and comments. Generally, it runs a LibTooling FrontendAction on source files, +// mapping each declaration in those files to its USR and serializing relevant +// information into LLVM bitcode. It then runs a pass over the collected +// declaration information, reducing by USR. There is an option to dump this +// intermediate result to bitcode. Finally, it hands the reduced information +// off to a generator, which does the final parsing from the intermediate +// representation to the desired output format. +// +//===----------------------------------------------------------------------===// + +#include +#include "ClangDoc.h" +#include "clang/AST/AST.h" +#include "clang/AST/Decl.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchersInternal.h" +#include "clang/Driver/Options.h" +#include "clang/Frontend/FrontendActions.h" +#include "clang/Tooling/CommonOptionsParser.h" +#include "clang/Tooling/Execution.h" +#include "clang/Tooling/StandaloneExecution.h" +#include "clang/Tooling/Tooling.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang::ast_matchers; +using namespace clang::tooling; +using namespace clang; + +static llvm::cl::extrahelp CommonHelp(CommonOptionsParser::HelpMessage); +static llvm::cl::OptionCategory ClangDocCategory("clang-doc options"); + +static llvm::cl::opt OutDirectory( + "output", llvm::cl::desc("Directory for outputting generated files."), + llvm::cl::init("docs"), llvm::cl::cat(ClangDocCategory)); + +static llvm::cl::opt DumpResult( + "dump", llvm::cl::desc("Dump intermediate results to bitcode file."), + llvm::cl::init(false), llvm::cl::cat(ClangDocCategory)); + +static llvm::cl::opt OmitFilenames( + "omit-filenames", llvm::cl::desc("Omit filenames in output."), + llvm::cl::init(false), llvm::cl::cat(ClangDocCategory)); + +static llvm::cl::opt DoxygenOnly( + "doxygen", + llvm::cl::desc("Use only doxygen-style comments to generate docs."), + llvm::cl::init(false), llvm::cl::cat(ClangDocCategory)); + +int main(int argc, const char **argv) { + llvm::sys::PrintStackTraceOnErrorSignal(argv[0]); + std::error_code OK; + + auto Exec = clang::tooling::createExecutorFromCommandLineArgs( + argc, argv, ClangDocCategory); + + if (!Exec) { + llvm::errs() << toString(Exec.takeError()) << "\n"; + return 1; + } + + ArgumentsAdjuster ArgAdjuster; + if (!DoxygenOnly) + ArgAdjuster = combineAdjusters( + getInsertArgumentAdjuster("-fparse-all-comments", + tooling::ArgumentInsertPosition::BEGIN), + ArgAdjuster); + + // Mapping phase + llvm::outs() << "Mapping decls...\n"; + auto Err = Exec->get()->execute( + llvm::make_unique( + Exec->get()->getExecutionContext(), OmitFilenames), + ArgAdjuster); + if (Err) llvm::errs() << toString(std::move(Err)) << "\n"; + + if (DumpResult) { + Exec->get()->getToolResults()->forEachResult([&](StringRef Key, + StringRef Value) { + SmallString<128> IRRootPath; + llvm::sys::path::native(OutDirectory, IRRootPath); + std::error_code DirectoryStatus = + llvm::sys::fs::create_directories(IRRootPath); + if (DirectoryStatus != OK) { + llvm::errs() << "Unable to create documentation directories.\n"; + return; + } + llvm::sys::path::append(IRRootPath, Key + ".bc"); + std::error_code OutErrorInfo; + llvm::raw_fd_ostream OS(IRRootPath, OutErrorInfo, llvm::sys::fs::F_None); + if (OutErrorInfo != OK) { + llvm::errs() << "Error opening documentation file.\n"; + return; + } + OS << Value; + OS.close(); + }); + } + + return 0; +} Index: docs/clang-doc.rst =================================================================== --- /dev/null +++ docs/clang-doc.rst @@ -0,0 +1,62 @@ +=================== +Clang-Doc +=================== + +.. contents:: + +:program:`clang-doc` is a tool for generating C and C++ documenation from +source code and comments. + +The tool is in a very early development stage, so you might encounter bugs and +crashes. Submitting reports with information about how to reproduce the issue +to `the LLVM bugtracker `_ will definitely help the +project. If you have any ideas or suggestions, please to put a feature request +there. + +Use +===== + +:program:`clang-doc` is a `LibTooling +`_-based tool, and so requires a +compile command database for your project (for an example of how to do this +see `How To Setup Tooling For LLVM +`_). + +The tool can be used on a single file or multiple files as defined in +the compile commands database: + +.. code-block:: console + + $ clang-doc /path/to/file.cpp -p /path/to/compile/commands + +This generates an intermediate representation of the declarations and their +associated information in the specified TUs, serialized to LLVM bitcode. + +As currently implemented, the tool is only able to parse TUs that can be +stored in-memory. Future additions will extend the current framework to use +map-reduce frameworks to allow for use with large codebases. + +:program:`clang-doc` offers the following options: + +.. code-block:: console + + $ clang-doc --help +USAGE: clang-doc [options] [... ] + +OPTIONS: + +Generic Options: + + -help - Display available options (-help-hidden for more) + -help-list - Display list of available options (-help-list-hidden for more) + -version - Display the version of this program + +clang-doc options: + + -doxygen - Use only doxygen-style comments to generate docs. + -dump - Dump intermediate results to bitcode file. + -extra-arg= - Additional argument to append to the compiler command line + -extra-arg-before= - Additional argument to prepend to the compiler command line + -omit-filenames - Omit filenames in output. + -output= - Directory for outputting generated files. + -p= - Build path Index: test/CMakeLists.txt =================================================================== --- test/CMakeLists.txt +++ test/CMakeLists.txt @@ -41,6 +41,7 @@ clang-apply-replacements clang-change-namespace clangd + clang-doc clang-include-fixer clang-move clang-query Index: test/clang-doc/mapper-class.cpp =================================================================== --- /dev/null +++ test/clang-doc/mapper-class.cpp @@ -0,0 +1,21 @@ +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: echo "" > %t/compile_flags.txt +// RUN: cp "%s" "%t/test.cpp" +// RUN: clang-doc --dump --omit-filenames -doxygen -p %t %t/test.cpp -output=%t/docs +// RUN: llvm-bcanalyzer %t/docs/c:@S@E.bc --dump | FileCheck %s + +class E {}; +// CHECK: +// CHECK: + // CHECK: +// CHECK: +// CHECK: + // CHECK: blob data = 'c:@S@E' + // CHECK: blob data = 'E' + // CHECK: + // CHECK: +// CHECK: + + + Index: test/clang-doc/mapper-enum.cpp =================================================================== --- /dev/null +++ test/clang-doc/mapper-enum.cpp @@ -0,0 +1,27 @@ +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: echo "" > %t/compile_flags.txt +// RUN: cp "%s" "%t/test.cpp" +// RUN: clang-doc --dump --omit-filenames -doxygen -p %t %t/test.cpp -output=%t/docs +// RUN: llvm-bcanalyzer %t/docs/c:@E@B.bc --dump | FileCheck %s + +enum B { X, Y }; +// CHECK: +// CHECK: + // CHECK: +// CHECK: +// CHECK: + // CHECK: blob data = 'c:@E@B' + // CHECK: blob data = 'B' + // CHECK: + // CHECK: + // CHECK: blob data = 'X' + // CHECK: + // CHECK: + // CHECK: blob data = 'Y' + // CHECK: +// CHECK: + + + + Index: test/clang-doc/mapper-function.cpp =================================================================== --- /dev/null +++ test/clang-doc/mapper-function.cpp @@ -0,0 +1,29 @@ +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: echo "" > %t/compile_flags.txt +// RUN: cp "%s" "%t/test.cpp" +// RUN: clang-doc --dump --omit-filenames -doxygen -p %t %t/test.cpp -output=%t/docs +// RUN: llvm-bcanalyzer %t/docs/c:@F@F#I#.bc --dump | FileCheck %s + +int F(int param) { return param; } +// CHECK: +// CHECK: + // CHECK: +// CHECK: +// CHECK: + // CHECK: blob data = 'c:@F@F#I#' + // CHECK: blob data = 'F' + // CHECK: + // CHECK: + // CHECK: blob data = 'int' + // CHECK: + // CHECK: + // CHECK: blob data = 'int' + // CHECK: blob data = 'param' + // CHECK: +// CHECK: + + + + + Index: test/clang-doc/mapper-method.cpp =================================================================== --- /dev/null +++ test/clang-doc/mapper-method.cpp @@ -0,0 +1,33 @@ +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: echo "" > %t/compile_flags.txt +// RUN: cp "%s" "%t/test.cpp" +// RUN: clang-doc --dump --omit-filenames -doxygen -p %t %t/test.cpp -output=%t/docs +// RUN: llvm-bcanalyzer %t/docs/c:@S@G@F@Method#I#.bc --dump | FileCheck %s + +class G { +public: + int Method(int param) { return param; } +}; +// CHECK: +// CHECK: + // CHECK: +// CHECK: +// CHECK: + // CHECK: blob data = 'c:@S@G@F@Method#I#' + // CHECK: blob data = 'Method' + // CHECK: + // CHECK: blob data = 'c:@S@G' + // CHECK: + // CHECK: blob data = 'int' + // CHECK: + // CHECK: + // CHECK: blob data = 'int' + // CHECK: blob data = 'param' + // CHECK: +// CHECK: + + + + + Index: test/clang-doc/mapper-namespace.cpp =================================================================== --- /dev/null +++ test/clang-doc/mapper-namespace.cpp @@ -0,0 +1,19 @@ +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: echo "" > %t/compile_flags.txt +// RUN: cp "%s" "%t/test.cpp" +// RUN: clang-doc --dump --omit-filenames -doxygen -p %t %t/test.cpp -output=%t/docs +// RUN: llvm-bcanalyzer %t/docs/c:@N@A.bc --dump | FileCheck %s + +namespace A {} +// CHECK: +// CHECK: + // CHECK: +// CHECK: +// CHECK: + // CHECK: blob data = 'c:@N@A' + // CHECK: blob data = 'A' +// CHECK: + + + Index: test/clang-doc/mapper-struct.cpp =================================================================== --- /dev/null +++ test/clang-doc/mapper-struct.cpp @@ -0,0 +1,26 @@ +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: echo "" > %t/compile_flags.txt +// RUN: cp "%s" "%t/test.cpp" +// RUN: clang-doc --dump --omit-filenames -doxygen -p %t %t/test.cpp -output=%t/docs +// RUN: llvm-bcanalyzer %t/docs/c:@S@C.bc --dump | FileCheck %s + +struct C { int i; }; +// CHECK: +// CHECK: + // CHECK: +// CHECK: +// CHECK: + // CHECK: blob data = 'c:@S@C' + // CHECK: blob data = 'C' + // CHECK: + // CHECK: + // CHECK: blob data = 'int' + // CHECK: blob data = 'C::i' + // CHECK: + // CHECK: +// CHECK: + + + + Index: test/clang-doc/mapper-union.cpp =================================================================== --- /dev/null +++ test/clang-doc/mapper-union.cpp @@ -0,0 +1,32 @@ +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: echo "" > %t/compile_flags.txt +// RUN: cp "%s" "%t/test.cpp" +// RUN: clang-doc --dump --omit-filenames -doxygen -p %t %t/test.cpp -output=%t/docs +// RUN: llvm-bcanalyzer %t/docs/c:@U@D.bc --dump | FileCheck %s + +union D { int X; int Y; }; +// CHECK: +// CHECK: + // CHECK: +// CHECK: +// CHECK: + // CHECK: blob data = 'c:@U@D' + // CHECK: blob data = 'D' + // CHECK: + // CHECK: + // CHECK: + // CHECK: blob data = 'int' + // CHECK: blob data = 'D::X' + // CHECK: + // CHECK: + // CHECK: + // CHECK: blob data = 'int' + // CHECK: blob data = 'D::Y' + // CHECK: + // CHECK: +// CHECK: + + + +