Index: include/clang/Index/IndexDataStoreSymbolUtils.h =================================================================== --- /dev/null +++ include/clang/Index/IndexDataStoreSymbolUtils.h @@ -0,0 +1,37 @@ +//===--- IndexDataStoreSymbolUtils.h - Utilities for indexstore symbols ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_INDEX_INDEXDATASTORESYMBOLUTILS_H +#define LLVM_CLANG_INDEX_INDEXDATASTORESYMBOLUTILS_H + +#include "indexstore/indexstore.h" +#include "clang/Index/IndexSymbol.h" + +namespace clang { +namespace index { + +/// Map a SymbolKind to an indexstore_symbol_kind_t. +indexstore_symbol_kind_t getIndexStoreKind(SymbolKind K); + +/// Map a SymbolSubKind to an indexstore_symbol_subkind_t. +indexstore_symbol_subkind_t getIndexStoreSubKind(SymbolSubKind K); + +/// Map a SymbolLanguage to an indexstore_symbol_language_t. +indexstore_symbol_language_t getIndexStoreLang(SymbolLanguage L); + +/// Map a SymbolPropertySet to its indexstore representation. +uint64_t getIndexStoreProperties(SymbolPropertySet Props); + +/// Map a SymbolRoleSet to its indexstore representation. +uint64_t getIndexStoreRoles(SymbolRoleSet Roles); + +} // end namespace index +} // end namespace clang + +#endif // LLVM_CLANG_INDEX_INDEXDATASTORESYMBOLUTILS_H Index: include/clang/Index/IndexRecordWriter.h =================================================================== --- /dev/null +++ include/clang/Index/IndexRecordWriter.h @@ -0,0 +1,108 @@ +//===--- IndexRecordWriter.h - Index record serialization -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_INDEX_INDEXRECORDWRITER_H +#define LLVM_CLANG_INDEX_INDEXRECORDWRITER_H + +#include "clang/Index/IndexSymbol.h" +#include "llvm/ADT/SmallString.h" + +namespace clang { +namespace index { + +namespace writer { +/// An opaque pointer to a declaration or other symbol used by the +/// IndexRecordWriter to identify when two occurrences refer to the same symbol, +/// and as a token for getting information about a symbol from the caller. +typedef const void *OpaqueDecl; + +/// An indexer symbol suitable for serialization. +/// +/// This includes all the information about the symbol that will be serialized +/// except for roles, which are synthesized by looking at all the occurrences. +/// +/// \seealso IndexRecordDecl +/// \note this struct is generally accompanied by a buffer that owns the string +/// storage. It should not be stored permanently. +struct Symbol { + SymbolInfo SymInfo; + StringRef Name; + StringRef USR; + StringRef CodeGenName; +}; + +/// A relation to an opaque symbol. +/// \seealso IndexRecordRelation +struct SymbolRelation { + OpaqueDecl RelatedSymbol; + SymbolRoleSet Roles; +}; + +typedef llvm::function_ref &Scratch)> + SymbolWriterCallback; +} // end namespace writer + +/// A language-independent utility for serializing index record files. +/// +/// Internally, this class is a small state machine. Users should first call +/// beginRecord, and if the file does not already exist, then proceed to add +/// all symbol occurrences (addOccurrence) and finally finish with endRecord. +class IndexRecordWriter { + struct RecordState; + + /// The records directory path. + SmallString<64> RecordsPath; + /// The state of the current record. + std::unique_ptr Record; + +public: + IndexRecordWriter(StringRef IndexPath); + ~IndexRecordWriter(); + + enum class Result { + Success, + Failure, + AlreadyExists, + }; + + /// Begin writing a record for the file \p Filename with contents uniquely + /// identified by \p RecordHash. + /// + /// \param Filename the name of the file this is a record for. + /// \param RecordHash the unique hash of the record contents. + /// \param Error on failure, set to the error message. + /// \param RecordFile if non-null, this is set to the name of the record file. + /// + /// \returns Success if we should continue writing this record, AlreadyExists + /// if the record file has already been written, or Failure if there was an + /// error, in which case \p Error will be set. + Result beginRecord(StringRef Filename, llvm::hash_code RecordHash, + std::string &Error, std::string *RecordFile = nullptr); + + /// Finish writing the record file. + /// + /// \param Error on failure, set to the error message. + /// \param GetSymbolForDecl a callback mapping an writer::OpaqueDecl to its + /// writer::Symbol. This is how the language-specific symbol information is + /// provided to the IndexRecordWriter. The scratch parameter can be used for + /// any necessary storage. + /// + /// \return Success, or Failure and sets \p Error. + Result endRecord(std::string &Error, + writer::SymbolWriterCallback GetSymbolForDecl); + + /// Add an occurrence of the symbol \p D with the given \p Roles and location. + void addOccurrence(writer::OpaqueDecl D, SymbolRoleSet Roles, unsigned Line, + unsigned Column, ArrayRef Related); +}; + +} // end namespace index +} // end namespace clang + +#endif // LLVM_CLANG_INDEX_INDEXRECORDWRITER_H Index: include/clang/Index/IndexUnitWriter.h =================================================================== --- /dev/null +++ include/clang/Index/IndexUnitWriter.h @@ -0,0 +1,141 @@ +//===--- IndexUnitWriter.h - Index unit serialization ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_INDEX_INDEXUNITWRITER_H +#define LLVM_CLANG_INDEX_INDEXUNITWRITER_H + +#include "clang/Basic/LLVM.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallString.h" +#include +#include + +namespace llvm { +class BitstreamWriter; +} + +namespace clang { +class FileEntry; +class FileManager; + +namespace index { + +namespace writer { +/// An opaque pointer to a module used by the IndexUnitWriter to associate +/// record and file dependencies with a module, and as a token for getting +/// information about the module from the caller. +typedef const void *OpaqueModule; + +/// Module info suitable for serialization. +/// +/// This is used for top-level modules and sub-modules. +struct ModuleInfo { + /// Full, dot-separate, module name. + StringRef Name; +}; + +typedef llvm::function_ref &Scratch)> + ModuleInfoWriterCallback; +} // end namespace writer + +class IndexUnitWriter { + FileManager &FileMgr; + SmallString<64> UnitsPath; + std::string ProviderIdentifier; + std::string ProviderVersion; + std::string OutputFile; + std::string ModuleName; + const FileEntry *MainFile; + bool IsSystemUnit; + bool IsModuleUnit; + bool IsDebugCompilation; + std::string TargetTriple; + std::string WorkDir; + std::string SysrootPath; + std::function &Scratch)> + GetInfoForModuleFn; + struct FileInclude { + int Index; + unsigned Line; + }; + struct FileEntryData { + const FileEntry *File; + bool IsSystem; + int ModuleIndex; + std::vector Includes; + }; + std::vector Files; + std::vector Modules; + llvm::DenseMap IndexByFile; + llvm::DenseMap IndexByModule; + llvm::DenseSet SeenASTFiles; + struct RecordOrUnitData { + std::string Name; + int FileIndex; + int ModuleIndex; + bool IsSystem; + }; + std::vector Records; + std::vector ASTFileUnits; + +public: + /// \param MainFile the main file for a compiled source file. This should be + /// null for PCH and module units. + /// \param IsSystem true for system module units, false otherwise. + IndexUnitWriter(FileManager &FileMgr, StringRef StorePath, + StringRef ProviderIdentifier, StringRef ProviderVersion, + StringRef OutputFile, StringRef ModuleName, + const FileEntry *MainFile, bool IsSystem, bool IsModuleUnit, + bool IsDebugCompilation, StringRef TargetTriple, + StringRef SysrootPath, + writer::ModuleInfoWriterCallback GetInfoForModule); + ~IndexUnitWriter(); + + int addFileDependency(const FileEntry *File, bool IsSystem, + writer::OpaqueModule Mod); + void addRecordFile(StringRef RecordFile, const FileEntry *File, bool IsSystem, + writer::OpaqueModule Mod); + void addASTFileDependency(const FileEntry *File, bool IsSystem, + writer::OpaqueModule Mod, + bool withoutUnitName = false); + void addUnitDependency(StringRef UnitFile, const FileEntry *File, + bool IsSystem, writer::OpaqueModule Mod); + bool addInclude(const FileEntry *Source, unsigned Line, + const FileEntry *Target); + + bool write(std::string &Error); + + void getUnitNameForOutputFile(StringRef FilePath, SmallVectorImpl &Str); + void getUnitPathForOutputFile(StringRef FilePath, SmallVectorImpl &Str); + /// If the unit file exists and \p timeCompareFilePath is provided, it will + /// return true if \p timeCompareFilePath is older than the unit file. + Optional + isUnitUpToDateForOutputFile(StringRef FilePath, + Optional TimeCompareFilePath, + std::string &Error); + static void getUnitNameForAbsoluteOutputFile(StringRef FilePath, + SmallVectorImpl &Str); + static bool initIndexDirectory(StringRef StorePath, std::string &Error); + +private: + class PathStorage; + int addModule(writer::OpaqueModule Mod); + void writeUnitInfo(llvm::BitstreamWriter &Stream, PathStorage &PathStore); + void writeDependencies(llvm::BitstreamWriter &Stream, PathStorage &PathStore); + void writeIncludes(llvm::BitstreamWriter &Stream, PathStorage &PathStore); + void writePaths(llvm::BitstreamWriter &Stream, PathStorage &PathStore); + void writeModules(llvm::BitstreamWriter &Stream); +}; + +} // end namespace index +} // end namespace clang + +#endif Index: include/clang/Index/IndexingAction.h =================================================================== --- include/clang/Index/IndexingAction.h +++ include/clang/Index/IndexingAction.h @@ -81,6 +81,13 @@ createIndexDataRecordingAction(const FrontendOptions &FEOpts, std::unique_ptr WrappedAction); +/// Checks if the unit file exists for the module file, if it doesn't it +/// generates index data for it. +/// +/// \returns true if the index data were generated, false otherwise. +bool emitIndexDataForModuleFile(const Module *Mod, const CompilerInstance &CI, + IndexUnitWriter &ParentUnitWriter); + } // namespace index } // namespace clang Index: include/indexstore/indexstore.h =================================================================== --- /dev/null +++ include/indexstore/indexstore.h @@ -0,0 +1,143 @@ +/*===-- indexstore/indexstore.h - Index Store C API ----------------- C -*-===*\ +|* *| +|* The LLVM Compiler Infrastructure *| +|* *| +|* This file is distributed under the University of Illinois Open Source *| +|* License. See LICENSE.TXT for details. *| +|* *| +|*===----------------------------------------------------------------------===*| +|* *| +|* This header provides a C API for the index store. *| +|* *| +\*===----------------------------------------------------------------------===*/ + +#ifndef LLVM_CLANG_C_INDEXSTORE_INDEXSTORE_H +#define LLVM_CLANG_C_INDEXSTORE_INDEXSTORE_H + +#include +#include +#include + +/** + * \brief The version constants for the Index Store C API. + * INDEXSTORE_VERSION_MINOR should increase when there are API additions. + * INDEXSTORE_VERSION_MAJOR is intended for "major" source/ABI breaking changes. + */ +#define INDEXSTORE_VERSION_MAJOR 0 +#define INDEXSTORE_VERSION_MINOR 9 + +#define INDEXSTORE_VERSION_ENCODE(major, minor) ( \ + ((major) * 10000) \ + + ((minor) * 1)) + +#define INDEXSTORE_VERSION INDEXSTORE_VERSION_ENCODE( \ + INDEXSTORE_VERSION_MAJOR, \ + INDEXSTORE_VERSION_MINOR ) + +#define INDEXSTORE_VERSION_STRINGIZE_(major, minor) \ + #major"."#minor +#define INDEXSTORE_VERSION_STRINGIZE(major, minor) \ + INDEXSTORE_VERSION_STRINGIZE_(major, minor) + +#define INDEXSTORE_VERSION_STRING INDEXSTORE_VERSION_STRINGIZE( \ + INDEXSTORE_VERSION_MAJOR, \ + INDEXSTORE_VERSION_MINOR) + +#ifdef __cplusplus +# define INDEXSTORE_BEGIN_DECLS extern "C" { +# define INDEXSTORE_END_DECLS } +#else +# define INDEXSTORE_BEGIN_DECLS +# define INDEXSTORE_END_DECLS +#endif + +INDEXSTORE_BEGIN_DECLS + +typedef enum { + INDEXSTORE_SYMBOL_KIND_UNKNOWN = 0, + INDEXSTORE_SYMBOL_KIND_MODULE = 1, + INDEXSTORE_SYMBOL_KIND_NAMESPACE = 2, + INDEXSTORE_SYMBOL_KIND_NAMESPACEALIAS = 3, + INDEXSTORE_SYMBOL_KIND_MACRO = 4, + INDEXSTORE_SYMBOL_KIND_ENUM = 5, + INDEXSTORE_SYMBOL_KIND_STRUCT = 6, + INDEXSTORE_SYMBOL_KIND_CLASS = 7, + INDEXSTORE_SYMBOL_KIND_PROTOCOL = 8, + INDEXSTORE_SYMBOL_KIND_EXTENSION = 9, + INDEXSTORE_SYMBOL_KIND_UNION = 10, + INDEXSTORE_SYMBOL_KIND_TYPEALIAS = 11, + INDEXSTORE_SYMBOL_KIND_FUNCTION = 12, + INDEXSTORE_SYMBOL_KIND_VARIABLE = 13, + INDEXSTORE_SYMBOL_KIND_FIELD = 14, + INDEXSTORE_SYMBOL_KIND_ENUMCONSTANT = 15, + INDEXSTORE_SYMBOL_KIND_INSTANCEMETHOD = 16, + INDEXSTORE_SYMBOL_KIND_CLASSMETHOD = 17, + INDEXSTORE_SYMBOL_KIND_STATICMETHOD = 18, + INDEXSTORE_SYMBOL_KIND_INSTANCEPROPERTY = 19, + INDEXSTORE_SYMBOL_KIND_CLASSPROPERTY = 20, + INDEXSTORE_SYMBOL_KIND_STATICPROPERTY = 21, + INDEXSTORE_SYMBOL_KIND_CONSTRUCTOR = 22, + INDEXSTORE_SYMBOL_KIND_DESTRUCTOR = 23, + INDEXSTORE_SYMBOL_KIND_CONVERSIONFUNCTION = 24, + INDEXSTORE_SYMBOL_KIND_PARAMETER = 25, + INDEXSTORE_SYMBOL_KIND_USING = 26, + + INDEXSTORE_SYMBOL_KIND_COMMENTTAG = 1000, +} indexstore_symbol_kind_t; + +typedef enum { + INDEXSTORE_SYMBOL_SUBKIND_NONE = 0, + INDEXSTORE_SYMBOL_SUBKIND_CXXCOPYCONSTRUCTOR = 1, + INDEXSTORE_SYMBOL_SUBKIND_CXXMOVECONSTRUCTOR = 2, + INDEXSTORE_SYMBOL_SUBKIND_ACCESSORGETTER = 3, + INDEXSTORE_SYMBOL_SUBKIND_ACCESSORSETTER = 4, + INDEXSTORE_SYMBOL_SUBKIND_USINGTYPENAME = 5, + INDEXSTORE_SYMBOL_SUBKIND_USINGVALUE = 6, +} indexstore_symbol_subkind_t; + +typedef enum { + INDEXSTORE_SYMBOL_PROPERTY_GENERIC = 1 << 0, + INDEXSTORE_SYMBOL_PROPERTY_TEMPLATE_PARTIAL_SPECIALIZATION = 1 << 1, + INDEXSTORE_SYMBOL_PROPERTY_TEMPLATE_SPECIALIZATION = 1 << 2, + INDEXSTORE_SYMBOL_PROPERTY_UNITTEST = 1 << 3, + INDEXSTORE_SYMBOL_PROPERTY_IBANNOTATED = 1 << 4, + INDEXSTORE_SYMBOL_PROPERTY_IBOUTLETCOLLECTION = 1 << 5, + INDEXSTORE_SYMBOL_PROPERTY_GKINSPECTABLE = 1 << 6, + INDEXSTORE_SYMBOL_PROPERTY_LOCAL = 1 << 7, +} indexstore_symbol_property_t; + +typedef enum { + INDEXSTORE_SYMBOL_LANG_C = 0, + INDEXSTORE_SYMBOL_LANG_OBJC = 1, + INDEXSTORE_SYMBOL_LANG_CXX = 2, + + INDEXSTORE_SYMBOL_LANG_SWIFT = 100, +} indexstore_symbol_language_t; + +typedef enum { + INDEXSTORE_SYMBOL_ROLE_DECLARATION = 1 << 0, + INDEXSTORE_SYMBOL_ROLE_DEFINITION = 1 << 1, + INDEXSTORE_SYMBOL_ROLE_REFERENCE = 1 << 2, + INDEXSTORE_SYMBOL_ROLE_READ = 1 << 3, + INDEXSTORE_SYMBOL_ROLE_WRITE = 1 << 4, + INDEXSTORE_SYMBOL_ROLE_CALL = 1 << 5, + INDEXSTORE_SYMBOL_ROLE_DYNAMIC = 1 << 6, + INDEXSTORE_SYMBOL_ROLE_ADDRESSOF = 1 << 7, + INDEXSTORE_SYMBOL_ROLE_IMPLICIT = 1 << 8, + + // Relation roles. + INDEXSTORE_SYMBOL_ROLE_REL_CHILDOF = 1 << 9, + INDEXSTORE_SYMBOL_ROLE_REL_BASEOF = 1 << 10, + INDEXSTORE_SYMBOL_ROLE_REL_OVERRIDEOF = 1 << 11, + INDEXSTORE_SYMBOL_ROLE_REL_RECEIVEDBY = 1 << 12, + INDEXSTORE_SYMBOL_ROLE_REL_CALLEDBY = 1 << 13, + INDEXSTORE_SYMBOL_ROLE_REL_EXTENDEDBY = 1 << 14, + INDEXSTORE_SYMBOL_ROLE_REL_ACCESSOROF = 1 << 15, + INDEXSTORE_SYMBOL_ROLE_REL_CONTAINEDBY = 1 << 16, + INDEXSTORE_SYMBOL_ROLE_REL_IBTYPEOF = 1 << 17, + INDEXSTORE_SYMBOL_ROLE_REL_SPECIALIZATIONOF = 1 << 18, +} indexstore_symbol_role_t; + +INDEXSTORE_END_DECLS + +#endif Index: lib/Index/BitstreamVisitor.h =================================================================== --- /dev/null +++ lib/Index/BitstreamVisitor.h @@ -0,0 +1,155 @@ +//===--- BitstreamVisitor.h - Helper for reading a bitstream --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_INDEX_BITSTREAMVISITOR_H +#define LLVM_CLANG_LIB_INDEX_BITSTREAMVISITOR_H + +#include "clang/Basic/LLVM.h" +#include "clang/Serialization/ASTReader.h" +#include "llvm/Bitcode/BitstreamReader.h" +#include + +namespace clang { +namespace index { +namespace store { + +/// Helper class that saves the current stream position and +/// then restores it when destroyed. +struct SavedStreamPosition { + explicit SavedStreamPosition(llvm::BitstreamCursor &Cursor) + : Cursor(Cursor), Offset(Cursor.GetCurrentBitNo()) {} + + ~SavedStreamPosition() { Cursor.JumpToBit(Offset); } + +private: + llvm::BitstreamCursor &Cursor; + uint64_t Offset; +}; + +enum class StreamVisit { Continue, Skip, Abort }; + +template class BitstreamVisitor { + SmallVector BlockStack; + +protected: + llvm::BitstreamCursor &Stream; + Optional BlockInfo; + std::string *Error; + +public: + BitstreamVisitor(llvm::BitstreamCursor &Stream) : Stream(Stream) {} + + StreamVisit visitBlock(unsigned ID) { return StreamVisit::Continue; } + + bool visit(std::string &Error) { + this->Error = &Error; + + ASTReader::RecordData Record; + while (1) { + llvm::BitstreamEntry Entry = + Stream.advance(llvm::BitstreamCursor::AF_DontPopBlockAtEnd); + + switch (Entry.Kind) { + case llvm::BitstreamEntry::Error: + Error = "malformed serialization"; + return false; + + case llvm::BitstreamEntry::EndBlock: + if (BlockStack.empty()) + return true; + BlockStack.pop_back(); + if (Stream.ReadBlockEnd()) { + Error = "malformed serialization"; + return false; + } + if (Stream.AtEndOfStream()) + return true; + break; + + case llvm::BitstreamEntry::SubBlock: { + if (Entry.ID == llvm::bitc::BLOCKINFO_BLOCK_ID) { + BlockInfo = Stream.ReadBlockInfoBlock(); + if (!BlockInfo) { + Error = "malformed BlockInfoBlock"; + return false; + } + Stream.setBlockInfo(&*BlockInfo); + break; + } + + StreamVisit Ret = static_cast(this)->visitBlock(Entry.ID); + switch (Ret) { + case StreamVisit::Continue: + if (Stream.EnterSubBlock(Entry.ID)) { + Error = "malformed block record"; + return false; + } + readBlockAbbrevs(Stream); + BlockStack.push_back(Entry.ID); + break; + + case StreamVisit::Skip: + if (Stream.SkipBlock()) { + Error = "malformed serialization"; + return false; + } + if (Stream.AtEndOfStream()) + return true; + break; + + case StreamVisit::Abort: + return false; + } + break; + } + + case llvm::BitstreamEntry::Record: { + Record.clear(); + StringRef Blob; + unsigned RecID = Stream.readRecord(Entry.ID, Record, &Blob); + unsigned BlockID = BlockStack.empty() ? 0 : BlockStack.back(); + StreamVisit Ret = static_cast(this)->visitRecord( + BlockID, RecID, Record, Blob); + switch (Ret) { + case StreamVisit::Continue: + break; + + case StreamVisit::Skip: + Stream.skipRecord(Entry.ID); + break; + + case StreamVisit::Abort: + return false; + } + break; + } + } + } + } + + static void readBlockAbbrevs(llvm::BitstreamCursor &Cursor) { + while (true) { + uint64_t Offset = Cursor.GetCurrentBitNo(); + unsigned Code = Cursor.ReadCode(); + + // We expect all abbrevs to be at the start of the block. + if (Code != llvm::bitc::DEFINE_ABBREV) { + Cursor.JumpToBit(Offset); + return; + } + Cursor.ReadAbbrevRecord(); + } + } +}; + +} // end namespace store +} // end namespace index +} // end namespace clang + +#endif Index: lib/Index/CMakeLists.txt =================================================================== --- lib/Index/CMakeLists.txt +++ lib/Index/CMakeLists.txt @@ -1,18 +1,25 @@ set(LLVM_LINK_COMPONENTS + BitReader Core Support ) add_clang_library(clangIndex + ClangIndexRecordWriter.cpp CodegenNameGenerator.cpp CommentToXML.cpp FileIndexRecord.cpp IndexBody.cpp + IndexDataStore.cpp + IndexDataStoreUtils.cpp IndexDecl.cpp IndexingAction.cpp IndexingContext.cpp + IndexRecordHasher.cpp + IndexRecordWriter.cpp IndexSymbol.cpp IndexTypeSourceInfo.cpp + IndexUnitWriter.cpp USRGeneration.cpp ADDITIONAL_HEADERS Index: lib/Index/ClangIndexRecordWriter.h =================================================================== --- /dev/null +++ lib/Index/ClangIndexRecordWriter.h @@ -0,0 +1,55 @@ +//===--- ClangIndexRecordWriter.h - Index record serialization ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_INDEX_CLANGINDEXRECORDWRITER_H +#define LLVM_CLANG_LIB_INDEX_CLANGINDEXRECORDWRITER_H + +#include "IndexRecordHasher.h" +#include "clang/Index/CodegenNameGenerator.h" +#include "clang/Index/IndexRecordWriter.h" +#include "clang/Index/IndexingAction.h" +#include "llvm/ADT/SmallString.h" + +namespace clang { +class ASTContext; +class Decl; + +namespace index { +class FileIndexRecord; + +class ClangIndexRecordWriter { + IndexRecordWriter Impl; + + ASTContext &Ctx; + RecordingOptions RecordOpts; + + std::unique_ptr CGNameGen; + llvm::BumpPtrAllocator Allocator; + llvm::DenseMap USRByDecl; + IndexRecordHasher Hasher; + +public: + ClangIndexRecordWriter(ASTContext &Ctx, RecordingOptions Opts); + ~ClangIndexRecordWriter(); + + ASTContext &getASTContext() { return Ctx; } + CodegenNameGenerator *getCGNameGen() { return CGNameGen.get(); } + + bool writeRecord(StringRef Filename, const FileIndexRecord &Record, + std::string &Error, std::string *RecordFile = nullptr); + StringRef getUSR(const Decl *D); + +private: + StringRef getUSRNonCached(const Decl *D); +}; + +} // end namespace index +} // end namespace clang + +#endif Index: lib/Index/ClangIndexRecordWriter.cpp =================================================================== --- /dev/null +++ lib/Index/ClangIndexRecordWriter.cpp @@ -0,0 +1,127 @@ +//===--- ClangIndexRecordWriter.cpp - Index record serialization ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ClangIndexRecordWriter.h" +#include "FileIndexRecord.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/DeclCXX.h" +#include "clang/AST/DeclObjC.h" +#include "clang/Index/IndexSymbol.h" +#include "clang/Index/USRGeneration.h" + +using namespace clang; +using namespace clang::index; + +StringRef ClangIndexRecordWriter::getUSR(const Decl *D) { + assert(D->isCanonicalDecl()); + auto Insert = USRByDecl.insert(std::make_pair(D, StringRef())); + if (Insert.second) { + Insert.first->second = getUSRNonCached(D); + } + return Insert.first->second; +} + +StringRef ClangIndexRecordWriter::getUSRNonCached(const Decl *D) { + SmallString<256> Buf; + bool Ignore = generateUSRForDecl(D, Buf); + if (Ignore) + return StringRef(); + StringRef USR = Buf.str(); + char *Ptr = Allocator.Allocate(USR.size()); + std::copy(USR.begin(), USR.end(), Ptr); + return StringRef(Ptr, USR.size()); +} + +ClangIndexRecordWriter::ClangIndexRecordWriter(ASTContext &Ctx, + RecordingOptions Opts) + : Impl(Opts.DataDirPath), Ctx(Ctx), RecordOpts(std::move(Opts)), + Hasher(Ctx) { + if (Opts.RecordSymbolCodeGenName) + CGNameGen.reset(new CodegenNameGenerator(Ctx)); +} + +ClangIndexRecordWriter::~ClangIndexRecordWriter() {} + +bool ClangIndexRecordWriter::writeRecord(StringRef Filename, + const FileIndexRecord &IdxRecord, + std::string &Error, + std::string *OutRecordFile) { + + auto RecordHash = Hasher.hashRecord(IdxRecord); + + switch (Impl.beginRecord(Filename, RecordHash, Error, OutRecordFile)) { + case IndexRecordWriter::Result::Success: + break; // Continue writing. + case IndexRecordWriter::Result::Failure: + return true; + case IndexRecordWriter::Result::AlreadyExists: + return false; + } + + ASTContext &Ctx = getASTContext(); + SourceManager &SM = Ctx.getSourceManager(); + FileID FID = IdxRecord.getFileID(); + auto getLineCol = [&](unsigned Offset) -> std::pair { + unsigned LineNo = SM.getLineNumber(FID, Offset); + unsigned ColNo = SM.getColumnNumber(FID, Offset); + return std::make_pair(LineNo, ColNo); + }; + + for (auto &Occur : IdxRecord.getDeclOccurrences()) { + unsigned Line, Col; + std::tie(Line, Col) = getLineCol(Occur.Offset); + SmallVector Related; + Related.reserve(Occur.Relations.size()); + for (auto &Rel : Occur.Relations) + Related.push_back(writer::SymbolRelation{Rel.RelatedSymbol, Rel.Roles}); + + Impl.addOccurrence(Occur.Dcl, Occur.Roles, Line, Col, Related); + } + + PrintingPolicy Policy(Ctx.getLangOpts()); + Policy.SuppressTemplateArgsInCXXConstructors = true; + + auto Result = Impl.endRecord( + Error, [&](writer::OpaqueDecl OD, SmallVectorImpl &Scratch) { + const Decl *D = static_cast(OD); + auto Info = getSymbolInfo(D); + + writer::Symbol Sym; + Sym.SymInfo = Info; + + auto *ND = dyn_cast(D); + if (ND) { + llvm::raw_svector_ostream OS(Scratch); + DeclarationName DeclName = ND->getDeclName(); + if (!DeclName.isEmpty()) + DeclName.print(OS, Policy); + } + unsigned NameLen = Scratch.size(); + Sym.Name = StringRef(Scratch.data(), NameLen); + + Sym.USR = getUSR(D); + assert(!Sym.USR.empty() && "Recorded decl without USR!"); + + if (CGNameGen && ND) { + llvm::raw_svector_ostream OS(Scratch); + CGNameGen->writeName(ND, OS); + } + unsigned CGNameLen = Scratch.size() - NameLen; + Sym.CodeGenName = StringRef(Scratch.data() + NameLen, CGNameLen); + return Sym; + }); + + switch (Result) { + case IndexRecordWriter::Result::Success: + case IndexRecordWriter::Result::AlreadyExists: + return false; + case IndexRecordWriter::Result::Failure: + return true; + } +} Index: lib/Index/IndexDataStore.cpp =================================================================== --- /dev/null +++ lib/Index/IndexDataStore.cpp @@ -0,0 +1,52 @@ +//===--- IndexDataStore.cpp - Index data store info -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "IndexDataStoreUtils.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Mutex.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace clang::index; +using namespace clang::index::store; +using namespace llvm; + +static void appendSubDir(StringRef subdir, + SmallVectorImpl &StorePathBuf) { + SmallString<10> VersionPath; + raw_svector_ostream(VersionPath) << 'v' << STORE_FORMAT_VERSION; + + sys::path::append(StorePathBuf, VersionPath); + sys::path::append(StorePathBuf, subdir); +} + +void store::appendInteriorUnitPath(StringRef UnitName, + SmallVectorImpl &PathBuf) { + sys::path::append(PathBuf, UnitName); +} + +void store::appendUnitSubDir(SmallVectorImpl &StorePathBuf) { + return appendSubDir("units", StorePathBuf); +} + +void store::appendRecordSubDir(SmallVectorImpl &StorePathBuf) { + return appendSubDir("records", StorePathBuf); +} + +void store::appendInteriorRecordPath(StringRef RecordName, + SmallVectorImpl &PathBuf) { + // To avoid putting a huge number of files into the records directory, create + // subdirectories based on the last 2 characters from the hash. + StringRef hash2chars = RecordName.substr(RecordName.size() - 2); + sys::path::append(PathBuf, hash2chars); + sys::path::append(PathBuf, RecordName); +} Index: lib/Index/IndexDataStoreUtils.h =================================================================== --- /dev/null +++ lib/Index/IndexDataStoreUtils.h @@ -0,0 +1,116 @@ +//===--- IndexDataStoreUtils.h - Functions/constants for the data store ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_INDEX_INDEXDATASTOREUTILS_H +#define LLVM_CLANG_LIB_INDEX_INDEXDATASTOREUTILS_H + +#include "clang/Basic/LLVM.h" +#include "llvm/Bitcode/BitCodes.h" + +namespace llvm { +class BitstreamWriter; +} + +namespace clang { +namespace index { +namespace store { + +static const unsigned STORE_FORMAT_VERSION = 5; + +void appendUnitSubDir(SmallVectorImpl &StorePathBuf); +void appendInteriorUnitPath(StringRef UnitName, SmallVectorImpl &PathBuf); +void appendRecordSubDir(SmallVectorImpl &StorePathBuf); +void appendInteriorRecordPath(StringRef RecordName, + SmallVectorImpl &PathBuf); + +enum RecordBitRecord { + REC_VERSION = 0, + REC_DECLINFO = 1, + REC_DECLOFFSETS = 2, + REC_DECLOCCURRENCE = 3, +}; + +enum RecordBitBlock { + REC_VERSION_BLOCK_ID = llvm::bitc::FIRST_APPLICATION_BLOCKID, + REC_DECLS_BLOCK_ID, + REC_DECLOFFSETS_BLOCK_ID, + REC_DECLOCCURRENCES_BLOCK_ID, +}; + +enum UnitBitRecord { + UNIT_VERSION = 0, + UNIT_INFO = 1, + UNIT_DEPENDENCY = 2, + UNIT_INCLUDE = 3, + UNIT_PATH = 4, + UNIT_PATH_BUFFER = 5, + UNIT_MODULE = 6, + UNIT_MODULE_BUFFER = 7, +}; + +enum UnitBitBlock { + UNIT_VERSION_BLOCK_ID = llvm::bitc::FIRST_APPLICATION_BLOCKID, + UNIT_INFO_BLOCK_ID, + UNIT_DEPENDENCIES_BLOCK_ID, + UNIT_INCLUDES_BLOCK_ID, + UNIT_PATHS_BLOCK_ID, + UNIT_MODULES_BLOCK_ID, +}; + +enum UnitDependencyKind { + UNIT_DEPEND_KIND_FILE = 0, + UNIT_DEPEND_KIND_RECORD = 1, + UNIT_DEPEND_KIND_UNIT = 2, +}; +static const unsigned UnitDependencyKindBitNum = 2; + +enum UnitFilePathPrefixKind { + UNIT_PATH_PREFIX_NONE = 0, + UNIT_PATH_PREFIX_WORKDIR = 1, + UNIT_PATH_PREFIX_SYSROOT = 2, +}; +static const unsigned UnitFilePathPrefixKindBitNum = 2; + +typedef SmallVector RecordData; +typedef SmallVectorImpl RecordDataImpl; + +struct BitPathComponent { + size_t Offset = 0; + size_t Size = 0; + BitPathComponent(size_t Offset, size_t Size) : Offset(Offset), Size(Size) {} + BitPathComponent() = default; +}; + +struct DirBitPath { + UnitFilePathPrefixKind PrefixKind = UNIT_PATH_PREFIX_NONE; + BitPathComponent Dir; + DirBitPath(UnitFilePathPrefixKind Kind, BitPathComponent Dir) + : PrefixKind(Kind), Dir(Dir) {} + DirBitPath() = default; +}; + +struct FileBitPath : DirBitPath { + BitPathComponent Filename; + FileBitPath(UnitFilePathPrefixKind Kind, BitPathComponent Dir, + BitPathComponent Filename) + : DirBitPath(Kind, Dir), Filename(Filename) {} + FileBitPath() = default; +}; + +void emitBlockID(unsigned ID, const char *Name, llvm::BitstreamWriter &Stream, + RecordDataImpl &Record); + +void emitRecordID(unsigned ID, const char *Name, llvm::BitstreamWriter &Stream, + RecordDataImpl &Record); + +} // end namespace store +} // end namespace index +} // end namespace clang + +#endif Index: lib/Index/IndexDataStoreUtils.cpp =================================================================== --- /dev/null +++ lib/Index/IndexDataStoreUtils.cpp @@ -0,0 +1,238 @@ +//===--- IndexDataStoreUtils.cpp - Functions/constants for the data store -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "IndexDataStoreUtils.h" +#include "clang/Index/IndexDataStoreSymbolUtils.h" +#include "llvm/Bitcode/BitstreamWriter.h" + +using namespace clang; +using namespace clang::index; +using namespace clang::index::store; +using namespace llvm; + +void store::emitBlockID(unsigned ID, const char *Name, BitstreamWriter &Stream, + RecordDataImpl &Record) { + Record.clear(); + Record.push_back(ID); + Stream.EmitRecord(bitc::BLOCKINFO_CODE_SETBID, Record); + + // Emit the block name if present. + if (!Name || Name[0] == 0) + return; + Record.clear(); + while (*Name) + Record.push_back(*Name++); + Stream.EmitRecord(bitc::BLOCKINFO_CODE_BLOCKNAME, Record); +} + +void store::emitRecordID(unsigned ID, const char *Name, BitstreamWriter &Stream, + RecordDataImpl &Record) { + Record.clear(); + Record.push_back(ID); + while (*Name) + Record.push_back(*Name++); + Stream.EmitRecord(bitc::BLOCKINFO_CODE_SETRECORDNAME, Record); +} + +/// Map a SymbolLanguage to a indexstore_symbol_language_t. +indexstore_symbol_kind_t index::getIndexStoreKind(SymbolKind K) { + switch (K) { + case SymbolKind::Unknown: + return INDEXSTORE_SYMBOL_KIND_UNKNOWN; + case SymbolKind::Module: + return INDEXSTORE_SYMBOL_KIND_MODULE; + case SymbolKind::Namespace: + return INDEXSTORE_SYMBOL_KIND_NAMESPACE; + case SymbolKind::NamespaceAlias: + return INDEXSTORE_SYMBOL_KIND_NAMESPACEALIAS; + case SymbolKind::Macro: + return INDEXSTORE_SYMBOL_KIND_MACRO; + case SymbolKind::Enum: + return INDEXSTORE_SYMBOL_KIND_ENUM; + case SymbolKind::Struct: + return INDEXSTORE_SYMBOL_KIND_STRUCT; + case SymbolKind::Class: + return INDEXSTORE_SYMBOL_KIND_CLASS; + case SymbolKind::Protocol: + return INDEXSTORE_SYMBOL_KIND_PROTOCOL; + case SymbolKind::Extension: + return INDEXSTORE_SYMBOL_KIND_EXTENSION; + case SymbolKind::Union: + return INDEXSTORE_SYMBOL_KIND_UNION; + case SymbolKind::TypeAlias: + return INDEXSTORE_SYMBOL_KIND_TYPEALIAS; + case SymbolKind::Function: + return INDEXSTORE_SYMBOL_KIND_FUNCTION; + case SymbolKind::Variable: + return INDEXSTORE_SYMBOL_KIND_VARIABLE; + case SymbolKind::Field: + return INDEXSTORE_SYMBOL_KIND_FIELD; + case SymbolKind::EnumConstant: + return INDEXSTORE_SYMBOL_KIND_ENUMCONSTANT; + case SymbolKind::InstanceMethod: + return INDEXSTORE_SYMBOL_KIND_INSTANCEMETHOD; + case SymbolKind::ClassMethod: + return INDEXSTORE_SYMBOL_KIND_CLASSMETHOD; + case SymbolKind::StaticMethod: + return INDEXSTORE_SYMBOL_KIND_STATICMETHOD; + case SymbolKind::InstanceProperty: + return INDEXSTORE_SYMBOL_KIND_INSTANCEPROPERTY; + case SymbolKind::ClassProperty: + return INDEXSTORE_SYMBOL_KIND_CLASSPROPERTY; + case SymbolKind::StaticProperty: + return INDEXSTORE_SYMBOL_KIND_STATICPROPERTY; + case SymbolKind::Constructor: + return INDEXSTORE_SYMBOL_KIND_CONSTRUCTOR; + case SymbolKind::Destructor: + return INDEXSTORE_SYMBOL_KIND_DESTRUCTOR; + case SymbolKind::ConversionFunction: + return INDEXSTORE_SYMBOL_KIND_CONVERSIONFUNCTION; + case SymbolKind::Parameter: + return INDEXSTORE_SYMBOL_KIND_PARAMETER; + case SymbolKind::Using: + return INDEXSTORE_SYMBOL_KIND_USING; + } + llvm_unreachable("unexpected symbol kind"); +} + +indexstore_symbol_subkind_t index::getIndexStoreSubKind(SymbolSubKind K) { + switch (K) { + case SymbolSubKind::None: + return INDEXSTORE_SYMBOL_SUBKIND_NONE; + case SymbolSubKind::CXXCopyConstructor: + return INDEXSTORE_SYMBOL_SUBKIND_CXXCOPYCONSTRUCTOR; + case SymbolSubKind::CXXMoveConstructor: + return INDEXSTORE_SYMBOL_SUBKIND_CXXMOVECONSTRUCTOR; + case SymbolSubKind::AccessorGetter: + return INDEXSTORE_SYMBOL_SUBKIND_ACCESSORGETTER; + case SymbolSubKind::AccessorSetter: + return INDEXSTORE_SYMBOL_SUBKIND_ACCESSORSETTER; + case SymbolSubKind::UsingTypename: + return INDEXSTORE_SYMBOL_SUBKIND_USINGTYPENAME; + case SymbolSubKind::UsingValue: + return INDEXSTORE_SYMBOL_SUBKIND_USINGVALUE; + } + llvm_unreachable("unexpected symbol subkind"); +} + +/// Map a SymbolLanguage to a indexstore_symbol_language_t. +indexstore_symbol_language_t index::getIndexStoreLang(SymbolLanguage L) { + switch (L) { + case SymbolLanguage::C: + return INDEXSTORE_SYMBOL_LANG_C; + case SymbolLanguage::ObjC: + return INDEXSTORE_SYMBOL_LANG_OBJC; + case SymbolLanguage::CXX: + return INDEXSTORE_SYMBOL_LANG_CXX; + case SymbolLanguage::Swift: + return INDEXSTORE_SYMBOL_LANG_SWIFT; + } + llvm_unreachable("unexpected symbol language"); +} + +/// Map a SymbolPropertySet to its indexstore representation. +uint64_t index::getIndexStoreProperties(SymbolPropertySet Props) { + uint64_t storeProp = 0; + applyForEachSymbolProperty(Props, [&](SymbolProperty prop) { + switch (prop) { + case SymbolProperty::Generic: + storeProp |= INDEXSTORE_SYMBOL_PROPERTY_GENERIC; + break; + case SymbolProperty::TemplatePartialSpecialization: + storeProp |= INDEXSTORE_SYMBOL_PROPERTY_TEMPLATE_PARTIAL_SPECIALIZATION; + break; + case SymbolProperty::TemplateSpecialization: + storeProp |= INDEXSTORE_SYMBOL_PROPERTY_TEMPLATE_SPECIALIZATION; + break; + case SymbolProperty::UnitTest: + storeProp |= INDEXSTORE_SYMBOL_PROPERTY_UNITTEST; + break; + case SymbolProperty::IBAnnotated: + storeProp |= INDEXSTORE_SYMBOL_PROPERTY_IBANNOTATED; + break; + case SymbolProperty::IBOutletCollection: + storeProp |= INDEXSTORE_SYMBOL_PROPERTY_IBOUTLETCOLLECTION; + break; + case SymbolProperty::GKInspectable: + storeProp |= INDEXSTORE_SYMBOL_PROPERTY_GKINSPECTABLE; + break; + case SymbolProperty::Local: + storeProp |= INDEXSTORE_SYMBOL_PROPERTY_LOCAL; + break; + } + }); + return storeProp; +} + +/// Map a SymbolRoleSet to its indexstore representation. +uint64_t index::getIndexStoreRoles(SymbolRoleSet Roles) { + uint64_t storeRoles = 0; + applyForEachSymbolRole(Roles, [&](SymbolRole role) { + switch (role) { + case SymbolRole::Declaration: + storeRoles |= INDEXSTORE_SYMBOL_ROLE_DECLARATION; + break; + case SymbolRole::Definition: + storeRoles |= INDEXSTORE_SYMBOL_ROLE_DEFINITION; + break; + case SymbolRole::Reference: + storeRoles |= INDEXSTORE_SYMBOL_ROLE_REFERENCE; + break; + case SymbolRole::Read: + storeRoles |= INDEXSTORE_SYMBOL_ROLE_READ; + break; + case SymbolRole::Write: + storeRoles |= INDEXSTORE_SYMBOL_ROLE_WRITE; + break; + case SymbolRole::Call: + storeRoles |= INDEXSTORE_SYMBOL_ROLE_CALL; + break; + case SymbolRole::Dynamic: + storeRoles |= INDEXSTORE_SYMBOL_ROLE_DYNAMIC; + break; + case SymbolRole::AddressOf: + storeRoles |= INDEXSTORE_SYMBOL_ROLE_ADDRESSOF; + break; + case SymbolRole::Implicit: + storeRoles |= INDEXSTORE_SYMBOL_ROLE_IMPLICIT; + break; + case SymbolRole::RelationChildOf: + storeRoles |= INDEXSTORE_SYMBOL_ROLE_REL_CHILDOF; + break; + case SymbolRole::RelationBaseOf: + storeRoles |= INDEXSTORE_SYMBOL_ROLE_REL_BASEOF; + break; + case SymbolRole::RelationOverrideOf: + storeRoles |= INDEXSTORE_SYMBOL_ROLE_REL_OVERRIDEOF; + break; + case SymbolRole::RelationReceivedBy: + storeRoles |= INDEXSTORE_SYMBOL_ROLE_REL_RECEIVEDBY; + break; + case SymbolRole::RelationCalledBy: + storeRoles |= INDEXSTORE_SYMBOL_ROLE_REL_CALLEDBY; + break; + case SymbolRole::RelationExtendedBy: + storeRoles |= INDEXSTORE_SYMBOL_ROLE_REL_EXTENDEDBY; + break; + case SymbolRole::RelationAccessorOf: + storeRoles |= INDEXSTORE_SYMBOL_ROLE_REL_ACCESSOROF; + break; + case SymbolRole::RelationContainedBy: + storeRoles |= INDEXSTORE_SYMBOL_ROLE_REL_CONTAINEDBY; + break; + case SymbolRole::RelationIBTypeOf: + storeRoles |= INDEXSTORE_SYMBOL_ROLE_REL_IBTYPEOF; + break; + case SymbolRole::RelationSpecializationOf: + storeRoles |= INDEXSTORE_SYMBOL_ROLE_REL_SPECIALIZATIONOF; + break; + } + }); + return storeRoles; +} Index: lib/Index/IndexRecordHasher.h =================================================================== --- /dev/null +++ lib/Index/IndexRecordHasher.h @@ -0,0 +1,57 @@ +//===--- IndexRecordHasher.h - Index record hashing -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_INDEX_INDEXRECORDHASHER_H +#define LLVM_CLANG_LIB_INDEX_INDEXRECORDHASHER_H + +#include "clang/Basic/LLVM.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Hashing.h" + +namespace clang { +class ASTContext; +class Decl; +class DeclarationName; +class NestedNameSpecifier; +class QualType; +class Type; +template class CanQual; +typedef CanQual CanQualType; + +namespace index { +class FileIndexRecord; + +class IndexRecordHasher { + ASTContext &Ctx; + llvm::DenseMap HashByPtr; + +public: + explicit IndexRecordHasher(ASTContext &Ctx) : Ctx(Ctx) {} + ASTContext &getASTContext() { return Ctx; } + + llvm::hash_code hashRecord(const FileIndexRecord &Record); + llvm::hash_code hash(const Decl *D); + llvm::hash_code hash(QualType Ty); + llvm::hash_code hash(CanQualType Ty); + llvm::hash_code hash(DeclarationName Name); + llvm::hash_code hash(const NestedNameSpecifier *NNS); + +private: + template llvm::hash_code tryCache(const void *Ptr, T Obj); + + llvm::hash_code hashImpl(const Decl *D); + llvm::hash_code hashImpl(CanQualType Ty); + llvm::hash_code hashImpl(DeclarationName Name); + llvm::hash_code hashImpl(const NestedNameSpecifier *NNS); +}; + +} // end namespace index +} // end namespace clang + +#endif Index: lib/Index/IndexRecordHasher.cpp =================================================================== --- /dev/null +++ lib/Index/IndexRecordHasher.cpp @@ -0,0 +1,480 @@ +//===--- IndexRecordHasher.cpp - Index record hashing ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "IndexRecordHasher.h" +#include "FileIndexRecord.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/AST/DeclVisitor.h" +#include "llvm/Support/Path.h" + +#define INITIAL_HASH 5381 +#define COMBINE_HASH(...) (Hash = hash_combine(Hash, __VA_ARGS__)) + +using namespace clang; +using namespace clang::index; +using namespace llvm; + +static hash_code computeHash(const TemplateArgument &Arg, + IndexRecordHasher &Hasher); + +namespace { +class DeclHashVisitor : public ConstDeclVisitor { + IndexRecordHasher &Hasher; + +public: + DeclHashVisitor(IndexRecordHasher &Hasher) : Hasher(Hasher) {} + + hash_code VisitDecl(const Decl *D) { + return VisitDeclContext(D->getDeclContext()); + } + + hash_code VisitNamedDecl(const NamedDecl *D) { + hash_code Hash = VisitDecl(D); + if (auto *attr = D->getExternalSourceSymbolAttr()) { + COMBINE_HASH(hash_value(attr->getDefinedIn())); + } + return COMBINE_HASH(Hasher.hash(D->getDeclName())); + } + + hash_code VisitTagDecl(const TagDecl *D) { + if (D->getDeclName().isEmpty()) { + if (const TypedefNameDecl *TD = D->getTypedefNameForAnonDecl()) + return Visit(TD); + + hash_code Hash = VisitDeclContext(D->getDeclContext()); + if (D->isEmbeddedInDeclarator() && !D->isFreeStanding()) { + COMBINE_HASH(hashLoc(D->getLocation(), /*IncludeOffset=*/true)); + } else + COMBINE_HASH('a'); + return Hash; + } + + hash_code Hash = VisitTypeDecl(D); + return COMBINE_HASH('T'); + } + + hash_code VisitClassTemplateSpecializationDecl( + const ClassTemplateSpecializationDecl *D) { + hash_code Hash = VisitCXXRecordDecl(D); + const TemplateArgumentList &Args = D->getTemplateArgs(); + COMBINE_HASH('>'); + for (unsigned I = 0, N = Args.size(); I != N; ++I) { + COMBINE_HASH(computeHash(Args.get(I), Hasher)); + } + return Hash; + } + + hash_code VisitObjCContainerDecl(const ObjCContainerDecl *D) { + hash_code Hash = VisitNamedDecl(D); + return COMBINE_HASH('I'); + } + + hash_code VisitObjCImplDecl(const ObjCImplDecl *D) { + if (auto *ID = D->getClassInterface()) + return VisitObjCInterfaceDecl(ID); + else + return 0; + } + + hash_code VisitObjCCategoryDecl(const ObjCCategoryDecl *D) { + // FIXME: Differentiate between category and the interface ? + if (auto *ID = D->getClassInterface()) + return VisitObjCInterfaceDecl(ID); + else + return 0; + } + + hash_code VisitFunctionDecl(const FunctionDecl *D) { + hash_code Hash = VisitNamedDecl(D); + ASTContext &Ctx = Hasher.getASTContext(); + if ((!Ctx.getLangOpts().CPlusPlus && !D->hasAttr()) || + D->isExternC()) + return Hash; + + for (auto param : D->parameters()) { + COMBINE_HASH(Hasher.hash(param->getType())); + } + return Hash; + } + + hash_code + VisitUnresolvedUsingTypenameDecl(const UnresolvedUsingTypenameDecl *D) { + hash_code Hash = VisitNamedDecl(D); + COMBINE_HASH(Hasher.hash(D->getQualifier())); + return Hash; + } + + hash_code VisitUnresolvedUsingValueDecl(const UnresolvedUsingValueDecl *D) { + hash_code Hash = VisitNamedDecl(D); + COMBINE_HASH(Hasher.hash(D->getQualifier())); + return Hash; + } + + hash_code VisitDeclContext(const DeclContext *DC) { + // FIXME: Add location if this is anonymous namespace ? + DC = DC->getRedeclContext(); + const Decl *D = cast(DC)->getCanonicalDecl(); + if (auto *ND = dyn_cast(D)) + return Hasher.hash(ND); + else + return 0; + } + + hash_code hashLoc(SourceLocation Loc, bool IncludeOffset) { + if (Loc.isInvalid()) { + return 0; + } + hash_code Hash = INITIAL_HASH; + const SourceManager &SM = Hasher.getASTContext().getSourceManager(); + Loc = SM.getFileLoc(Loc); + const std::pair Decomposed = SM.getDecomposedLoc(Loc); + const FileEntry *FE = SM.getFileEntryForID(Decomposed.first); + if (FE) { + COMBINE_HASH(llvm::sys::path::filename(FE->getName())); + } else { + // This case really isn't interesting. + return 0; + } + if (IncludeOffset) { + // Use the offset into the FileID to represent the location. Using + // a line/column can cause us to look back at the original source file, + // which is expensive. + COMBINE_HASH(Decomposed.second); + } + return Hash; + } +}; +} // namespace + +hash_code IndexRecordHasher::hashRecord(const FileIndexRecord &Record) { + hash_code Hash = INITIAL_HASH; + for (auto &Info : Record.getDeclOccurrences()) { + COMBINE_HASH(Info.Roles, Info.Offset, hash(Info.Dcl)); + for (auto &Rel : Info.Relations) { + COMBINE_HASH(hash(Rel.RelatedSymbol)); + } + } + return Hash; +} + +hash_code IndexRecordHasher::hash(const Decl *D) { + assert(D->isCanonicalDecl()); + + if (isa(D) || isa(D)) { + return tryCache(D, D); + } else if (auto *NS = dyn_cast(D)) { + if (NS->isAnonymousNamespace()) + return hash_value(StringRef("@aN")); + return tryCache(D, D); + } else { + // There's a balance between caching results and not growing the cache too + // much. Measurements showed that avoiding caching all decls is beneficial + // particularly when including all of Cocoa. + return hashImpl(D); + } +} + +hash_code IndexRecordHasher::hash(QualType NonCanTy) { + CanQualType CanTy = Ctx.getCanonicalType(NonCanTy); + return hash(CanTy); +} + +hash_code IndexRecordHasher::hash(CanQualType CT) { + // Do some hashing without going to the cache, for example we can avoid + // storing the hash for both the type and its const-qualified version. + hash_code Hash = INITIAL_HASH; + + auto asCanon = [](QualType Ty) -> CanQualType { + return CanQualType::CreateUnsafe(Ty); + }; + + while (true) { + Qualifiers Q = CT.getQualifiers(); + CT = CT.getUnqualifiedType(); + const Type *T = CT.getTypePtr(); + unsigned qVal = 0; + if (Q.hasConst()) + qVal |= Qualifiers::Const; + if (Q.hasVolatile()) + qVal |= Qualifiers::Volatile; + if (Q.hasRestrict()) + qVal |= Qualifiers::Restrict; + if (qVal) + COMBINE_HASH(qVal); + + // FIXME: Hash in ObjC GC qualifiers + + if (const BuiltinType *BT = dyn_cast(T)) { + return COMBINE_HASH(BT->getKind()); + } + if (const PointerType *PT = dyn_cast(T)) { + COMBINE_HASH('*'); + CT = asCanon(PT->getPointeeType()); + continue; + } + if (const ReferenceType *RT = dyn_cast(T)) { + COMBINE_HASH('&'); + CT = asCanon(RT->getPointeeType()); + continue; + } + if (const BlockPointerType *BT = dyn_cast(T)) { + COMBINE_HASH('B'); + CT = asCanon(BT->getPointeeType()); + continue; + } + if (const ObjCObjectPointerType *OPT = dyn_cast(T)) { + COMBINE_HASH('*'); + CT = asCanon(OPT->getPointeeType()); + continue; + } + if (const TagType *TT = dyn_cast(T)) { + return COMBINE_HASH('$', hash(TT->getDecl()->getCanonicalDecl())); + } + if (const ObjCInterfaceType *OIT = dyn_cast(T)) { + return COMBINE_HASH('$', hash(OIT->getDecl()->getCanonicalDecl())); + } + if (const ObjCObjectType *OIT = dyn_cast(T)) { + for (auto *Prot : OIT->getProtocols()) + COMBINE_HASH(hash(Prot)); + CT = asCanon(OIT->getBaseType()); + continue; + } + if (const TemplateTypeParmType *TTP = dyn_cast(T)) { + return COMBINE_HASH('t', TTP->getDepth(), TTP->getIndex()); + } + if (const InjectedClassNameType *InjT = + dyn_cast(T)) { + CT = asCanon(InjT->getInjectedSpecializationType().getCanonicalType()); + continue; + } + if (const PackExpansionType *Expansion = dyn_cast(T)) { + return COMBINE_HASH('P', hash(asCanon(Expansion->getPattern()))); + } + if (const RValueReferenceType *RT = dyn_cast(T)) { + return COMBINE_HASH('%', hash(asCanon(RT->getPointeeType()))); + } + if (const ComplexType *CT = dyn_cast(T)) { + return COMBINE_HASH('<', hash(asCanon(CT->getElementType()))); + } + + break; + } + + return COMBINE_HASH(tryCache(CT.getAsOpaquePtr(), CT)); +} + +hash_code IndexRecordHasher::hash(DeclarationName Name) { + assert(!Name.isEmpty()); + // Measurements for using cache or not here, showed significant slowdown when + // using the cache for all DeclarationNames when parsing Cocoa, and minor + // improvement or no difference for a couple of C++ single translation unit + // files. So we avoid caching DeclarationNames. + return hashImpl(Name); +} + +hash_code IndexRecordHasher::hash(const NestedNameSpecifier *NNS) { + assert(NNS); + // Measurements for the C++ single translation unit files did not show much + // difference here; choosing to cache them currently. + return tryCache(NNS, NNS); +} + +template +hash_code IndexRecordHasher::tryCache(const void *Ptr, T Obj) { + auto It = HashByPtr.find(Ptr); + if (It != HashByPtr.end()) + return It->second; + + hash_code Hash = hashImpl(Obj); + // hashImpl() may call into tryCache recursively and mutate + // HashByPtr, so we use find() earlier and insert the hash with another + // lookup here instead of calling insert() earlier and utilizing the iterator + // that insert() returns. + HashByPtr[Ptr] = Hash; + return Hash; +} + +hash_code IndexRecordHasher::hashImpl(const Decl *D) { + return DeclHashVisitor(*this).Visit(D); +} + +static hash_code computeHash(const IdentifierInfo *II) { + return hash_value(II->getName()); +} + +static hash_code computeHash(Selector Sel) { + unsigned N = Sel.getNumArgs(); + if (N == 0) + ++N; + hash_code Hash = INITIAL_HASH; + for (unsigned I = 0; I != N; ++I) + if (IdentifierInfo *II = Sel.getIdentifierInfoForSlot(I)) + COMBINE_HASH(computeHash(II)); + return Hash; +} + +static hash_code computeHash(TemplateName Name, IndexRecordHasher &Hasher) { + hash_code Hash = INITIAL_HASH; + if (TemplateDecl *Template = Name.getAsTemplateDecl()) { + if (TemplateTemplateParmDecl *TTP = + dyn_cast(Template)) { + return COMBINE_HASH('t', TTP->getDepth(), TTP->getIndex()); + } + + return COMBINE_HASH(Hasher.hash(Template->getCanonicalDecl())); + } + + // FIXME: Hash dependent template names. + return Hash; +} + +static hash_code computeHash(const TemplateArgument &Arg, + IndexRecordHasher &Hasher) { + hash_code Hash = INITIAL_HASH; + + switch (Arg.getKind()) { + case TemplateArgument::Null: + break; + + case TemplateArgument::Declaration: + COMBINE_HASH(Hasher.hash(Arg.getAsDecl())); + break; + + case TemplateArgument::NullPtr: + break; + + case TemplateArgument::TemplateExpansion: + COMBINE_HASH('P'); // pack expansion of... + // Fall through + case TemplateArgument::Template: + COMBINE_HASH(computeHash(Arg.getAsTemplateOrTemplatePattern(), Hasher)); + break; + + case TemplateArgument::Expression: + // FIXME: Hash expressions. + break; + + case TemplateArgument::Pack: + COMBINE_HASH('p'); + for (const auto &P : Arg.pack_elements()) + COMBINE_HASH(computeHash(P, Hasher)); + break; + + case TemplateArgument::Type: + COMBINE_HASH(Hasher.hash(Arg.getAsType())); + break; + + case TemplateArgument::Integral: + COMBINE_HASH('V', Hasher.hash(Arg.getIntegralType()), Arg.getAsIntegral()); + break; + } + + return Hash; +} + +hash_code IndexRecordHasher::hashImpl(CanQualType CQT) { + hash_code Hash = INITIAL_HASH; + + auto asCanon = [](QualType Ty) -> CanQualType { + return CanQualType::CreateUnsafe(Ty); + }; + + const Type *T = CQT.getTypePtr(); + + if (const FunctionProtoType *FT = dyn_cast(T)) { + COMBINE_HASH('F', hash(asCanon(FT->getReturnType()))); + for (const auto &I : FT->param_types()) + COMBINE_HASH(hash(asCanon(I))); + return COMBINE_HASH(FT->isVariadic()); + } + if (const TemplateSpecializationType *Spec = + dyn_cast(T)) { + COMBINE_HASH('>', computeHash(Spec->getTemplateName(), *this)); + for (unsigned I = 0, N = Spec->getNumArgs(); I != N; ++I) + COMBINE_HASH(computeHash(Spec->getArg(I), *this)); + return Hash; + } + if (const DependentNameType *DNT = dyn_cast(T)) { + COMBINE_HASH('^'); + if (const NestedNameSpecifier *NNS = DNT->getQualifier()) + COMBINE_HASH(hash(NNS)); + return COMBINE_HASH(computeHash(DNT->getIdentifier())); + } + + // FIXME: Unhandled types? + return Hash; +} + +hash_code IndexRecordHasher::hashImpl(DeclarationName Name) { + hash_code Hash = INITIAL_HASH; + COMBINE_HASH(Name.getNameKind()); + + switch (Name.getNameKind()) { + case DeclarationName::Identifier: + COMBINE_HASH(computeHash(Name.getAsIdentifierInfo())); + break; + case DeclarationName::ObjCZeroArgSelector: + case DeclarationName::ObjCOneArgSelector: + case DeclarationName::ObjCMultiArgSelector: + COMBINE_HASH(computeHash(Name.getObjCSelector())); + break; + case DeclarationName::CXXConstructorName: + case DeclarationName::CXXDestructorName: + case DeclarationName::CXXConversionFunctionName: + break; + case DeclarationName::CXXOperatorName: + COMBINE_HASH(Name.getCXXOverloadedOperator()); + break; + case DeclarationName::CXXLiteralOperatorName: + COMBINE_HASH(computeHash(Name.getCXXLiteralIdentifier())); + case DeclarationName::CXXUsingDirective: + case DeclarationName::CXXDeductionGuideName: + break; + } + + return Hash; +} + +hash_code IndexRecordHasher::hashImpl(const NestedNameSpecifier *NNS) { + hash_code Hash = INITIAL_HASH; + if (auto *Pre = NNS->getPrefix()) + COMBINE_HASH(hash(Pre)); + + COMBINE_HASH(NNS->getKind()); + + switch (NNS->getKind()) { + case NestedNameSpecifier::Identifier: + COMBINE_HASH(computeHash(NNS->getAsIdentifier())); + break; + + case NestedNameSpecifier::Namespace: + COMBINE_HASH(hash(NNS->getAsNamespace()->getCanonicalDecl())); + break; + + case NestedNameSpecifier::NamespaceAlias: + COMBINE_HASH(hash(NNS->getAsNamespaceAlias()->getCanonicalDecl())); + break; + + case NestedNameSpecifier::Global: + break; + + case NestedNameSpecifier::Super: + break; + + case NestedNameSpecifier::TypeSpecWithTemplate: + // Fall through to hash the type. + + case NestedNameSpecifier::TypeSpec: + COMBINE_HASH(hash(QualType(NNS->getAsType(), 0))); + break; + } + + return Hash; +} Index: lib/Index/IndexRecordWriter.cpp =================================================================== --- /dev/null +++ lib/Index/IndexRecordWriter.cpp @@ -0,0 +1,370 @@ +//===--- IndexRecordWriter.cpp - Index record serialization ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/Index/IndexRecordWriter.h" +#include "IndexDataStoreUtils.h" +#include "indexstore/indexstore.h" +#include "clang/Index/IndexDataStoreSymbolUtils.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Bitcode/BitstreamWriter.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace clang::index; +using namespace clang::index::store; +using namespace llvm; + +using writer::OpaqueDecl; + +namespace { +struct DeclInfo { + OpaqueDecl D; + SymbolRoleSet Roles; + SymbolRoleSet RelatedRoles; +}; + +struct OccurrenceInfo { + unsigned DeclID; + OpaqueDecl D; + SymbolRoleSet Roles; + unsigned Line; + unsigned Column; + SmallVector, 4> Related; +}; +} // end anonymous namespace + +static void writeBlockInfo(BitstreamWriter &Stream) { + RecordData Record; + + Stream.EnterBlockInfoBlock(); +#define BLOCK(X) emitBlockID(X##_ID, #X, Stream, Record) +#define RECORD(X) emitRecordID(X, #X, Stream, Record) + + BLOCK(REC_VERSION_BLOCK); + RECORD(REC_VERSION); + + BLOCK(REC_DECLS_BLOCK); + RECORD(REC_DECLINFO); + + BLOCK(REC_DECLOFFSETS_BLOCK); + RECORD(REC_DECLOFFSETS); + + BLOCK(REC_DECLOCCURRENCES_BLOCK); + RECORD(REC_DECLOCCURRENCE); + +#undef RECORD +#undef BLOCK + Stream.ExitBlock(); +} + +static void writeVersionInfo(BitstreamWriter &Stream) { + using namespace llvm::sys; + + Stream.EnterSubblock(REC_VERSION_BLOCK_ID, 3); + + auto Abbrev = std::make_shared(); + Abbrev->Add(BitCodeAbbrevOp(REC_VERSION)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Store format version + unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev)); + + RecordData Record; + Record.push_back(REC_VERSION); + Record.push_back(STORE_FORMAT_VERSION); + Stream.EmitRecordWithAbbrev(AbbrevCode, Record); + + Stream.ExitBlock(); +} + +template +static StringRef data(const std::vector &v) { + if (v.empty()) + return StringRef(); + return StringRef(reinterpret_cast(&v[0]), sizeof(T) * v.size()); +} + +template static StringRef data(const SmallVectorImpl &v) { + return StringRef(reinterpret_cast(v.data()), + sizeof(T) * v.size()); +} + +static void writeDecls(BitstreamWriter &Stream, ArrayRef Decls, + ArrayRef Occurrences, + writer::SymbolWriterCallback GetSymbolForDecl) { + SmallVector DeclOffsets; + DeclOffsets.reserve(Decls.size()); + + //===--------------------------------------------------------------------===// + // DECLS_BLOCK_ID + //===--------------------------------------------------------------------===// + + Stream.EnterSubblock(REC_DECLS_BLOCK_ID, 3); + + auto Abbrev = std::make_shared(); + Abbrev->Add(BitCodeAbbrevOp(REC_DECLINFO)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 5)); // Kind + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 5)); // SubKind + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 5)); // Language + // Properties + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, SymbolPropertyBitNum)); + // Roles + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, SymbolRoleBitNum)); + // Related Roles + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, SymbolRoleBitNum)); + // Length of name in block + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); + // Length of USR in block + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); + // Name + USR + CodeGen symbol name + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); + unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev)); + +#ifndef NDEBUG + StringSet<> USRSet; +#endif + + RecordData Record; + llvm::SmallString<256> Blob; + llvm::SmallString<256> Scratch; + for (auto &Info : Decls) { + DeclOffsets.push_back(Stream.GetCurrentBitNo()); + Blob.clear(); + Scratch.clear(); + + writer::Symbol SymInfo = GetSymbolForDecl(Info.D, Scratch); + assert(SymInfo.SymInfo.Kind != SymbolKind::Unknown); + assert(!SymInfo.USR.empty() && "Recorded decl without USR!"); + + Blob += SymInfo.Name; + Blob += SymInfo.USR; + Blob += SymInfo.CodeGenName; + +#ifndef NDEBUG + bool IsNew = USRSet.insert(SymInfo.USR).second; + if (!IsNew) { + llvm::errs() << "Index: Duplicate USR! " << SymInfo.USR << "\n"; + // FIXME: print more information so it's easier to find the declaration. + } +#endif + + Record.clear(); + Record.push_back(REC_DECLINFO); + Record.push_back(getIndexStoreKind(SymInfo.SymInfo.Kind)); + Record.push_back(getIndexStoreSubKind(SymInfo.SymInfo.SubKind)); + Record.push_back(getIndexStoreLang(SymInfo.SymInfo.Lang)); + Record.push_back(getIndexStoreProperties(SymInfo.SymInfo.Properties)); + Record.push_back(getIndexStoreRoles(Info.Roles)); + Record.push_back(getIndexStoreRoles(Info.RelatedRoles)); + Record.push_back(SymInfo.Name.size()); + Record.push_back(SymInfo.USR.size()); + Stream.EmitRecordWithBlob(AbbrevCode, Record, Blob); + } + + Stream.ExitBlock(); + + //===--------------------------------------------------------------------===// + // DECLOFFSETS_BLOCK_ID + //===--------------------------------------------------------------------===// + + Stream.EnterSubblock(REC_DECLOFFSETS_BLOCK_ID, 3); + + Abbrev = std::make_shared(); + Abbrev->Add(BitCodeAbbrevOp(REC_DECLOFFSETS)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // Number of Decls + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Offsets array + AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev)); + + Record.clear(); + Record.push_back(REC_DECLOFFSETS); + Record.push_back(DeclOffsets.size()); + Stream.EmitRecordWithBlob(AbbrevCode, Record, data(DeclOffsets)); + + Stream.ExitBlock(); + + //===--------------------------------------------------------------------===// + // DECLOCCURRENCES_BLOCK_ID + //===--------------------------------------------------------------------===// + + Stream.EnterSubblock(REC_DECLOCCURRENCES_BLOCK_ID, 3); + + Abbrev = std::make_shared(); + Abbrev->Add(BitCodeAbbrevOp(REC_DECLOCCURRENCE)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Decl ID + // Roles + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, SymbolRoleBitNum)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 12)); // Line + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Column + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // Num related + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); // Related Roles/IDs + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // Roles or ID + AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev)); + + for (auto &Occur : Occurrences) { + Record.clear(); + Record.push_back(REC_DECLOCCURRENCE); + Record.push_back(Occur.DeclID); + Record.push_back(getIndexStoreRoles(Occur.Roles)); + Record.push_back(Occur.Line); + Record.push_back(Occur.Column); + Record.push_back(Occur.Related.size()); + for (auto &Rel : Occur.Related) { + Record.push_back(getIndexStoreRoles(Rel.first.Roles)); + Record.push_back(Rel.second); + } + Stream.EmitRecordWithAbbrev(AbbrevCode, Record); + } + Stream.ExitBlock(); +} + +struct IndexRecordWriter::RecordState { + std::string RecordPath; + SmallString<512> Buffer; + BitstreamWriter Stream; + + DenseMap IndexForDecl; + std::vector Decls; + std::vector Occurrences; + + RecordState(std::string &&RecordPath) + : RecordPath(std::move(RecordPath)), Stream(Buffer) {} +}; + +IndexRecordWriter::IndexRecordWriter(StringRef IndexPath) + : RecordsPath(IndexPath) { + store::appendRecordSubDir(RecordsPath); +} +IndexRecordWriter::~IndexRecordWriter() = default; + +IndexRecordWriter::Result +IndexRecordWriter::beginRecord(StringRef Filename, hash_code RecordHash, + std::string &Error, std::string *OutRecordFile) { + using namespace llvm::sys; + assert(!Record && "called beginRecord before calling endRecord on previous"); + + std::string RecordName; + { + llvm::raw_string_ostream RN(RecordName); + RN << path::filename(Filename); + RN << "-" << APInt(64, RecordHash).toString(36, /*Signed=*/false); + } + SmallString<256> RecordPath = RecordsPath.str(); + appendInteriorRecordPath(RecordName, RecordPath); + + if (OutRecordFile) + *OutRecordFile = RecordName; + + if (std::error_code EC = + fs::access(RecordPath.c_str(), fs::AccessMode::Exist)) { + if (EC != errc::no_such_file_or_directory) { + llvm::raw_string_ostream Err(Error); + Err << "could not access record '" << RecordPath << "': " << EC.message(); + return Result::Failure; + } + } else { + return Result::AlreadyExists; + } + + // Write the record header. + Record = llvm::make_unique(RecordPath.str()); + llvm::BitstreamWriter &Stream = Record->Stream; + Stream.Emit('I', 8); + Stream.Emit('D', 8); + Stream.Emit('X', 8); + Stream.Emit('R', 8); + + writeBlockInfo(Stream); + writeVersionInfo(Stream); + + return Result::Success; +} + +IndexRecordWriter::Result +IndexRecordWriter::endRecord(std::string &Error, + writer::SymbolWriterCallback GetSymbolForDecl) { + assert(Record && "called endRecord without calling beginRecord"); + auto ScopedRecord = std::move(Record); + auto &State = *ScopedRecord; + + if (!State.Decls.empty()) { + writeDecls(State.Stream, State.Decls, State.Occurrences, GetSymbolForDecl); + } + + if (std::error_code EC = + sys::fs::create_directory(sys::path::parent_path(State.RecordPath))) { + llvm::raw_string_ostream Err(Error); + Err << "failed to create directory '" + << sys::path::parent_path(State.RecordPath) << "': " << EC.message(); + return Result::Failure; + } + + // Create a unique file to write to so that we can move the result into place + // atomically. If this process crashes we don't want to interfere with any + // other concurrent processes. + SmallString<128> TempPath(State.RecordPath); + TempPath += "-temp-%%%%%%%%"; + int TempFD; + if (sys::fs::createUniqueFile(TempPath.str(), TempFD, TempPath)) { + llvm::raw_string_ostream Err(Error); + Err << "failed to create temporary file: " << TempPath; + return Result::Failure; + } + + raw_fd_ostream OS(TempFD, /*shouldClose=*/true); + OS.write(State.Buffer.data(), State.Buffer.size()); + OS.close(); + + // Atomically move the unique file into place. + if (std::error_code EC = + sys::fs::rename(TempPath.c_str(), State.RecordPath.c_str())) { + llvm::raw_string_ostream Err(Error); + Err << "failed to rename '" << TempPath << "' to '" << State.RecordPath + << "': " << EC.message(); + return Result::Failure; + } + + return Result::Success; +} + +void IndexRecordWriter::addOccurrence( + OpaqueDecl D, SymbolRoleSet Roles, unsigned Line, unsigned Column, + ArrayRef Related) { + assert(Record && "called addOccurrence without calling beginRecord"); + auto &State = *Record; + + auto insertDecl = [&](OpaqueDecl D, SymbolRoleSet Roles, + SymbolRoleSet RelatedRoles) -> unsigned { + auto Insert = + State.IndexForDecl.insert(std::make_pair(D, State.Decls.size())); + unsigned Index = Insert.first->second; + + if (Insert.second) { + State.Decls.push_back(DeclInfo{D, Roles, RelatedRoles}); + } else { + State.Decls[Index].Roles |= Roles; + State.Decls[Index].RelatedRoles |= RelatedRoles; + } + return Index + 1; + }; + + unsigned DeclID = insertDecl(D, Roles, SymbolRoleSet()); + + decltype(OccurrenceInfo::Related) RelatedDecls; + RelatedDecls.reserve(Related.size()); + for (auto &Rel : Related) { + unsigned ID = insertDecl(Rel.RelatedSymbol, SymbolRoleSet(), Rel.Roles); + RelatedDecls.emplace_back(Rel, ID); + } + + State.Occurrences.push_back( + OccurrenceInfo{DeclID, D, Roles, Line, Column, std::move(RelatedDecls)}); +} Index: lib/Index/IndexUnitWriter.cpp =================================================================== --- /dev/null +++ lib/Index/IndexUnitWriter.cpp @@ -0,0 +1,647 @@ +//===--- IndexUnitWriter.cpp - Index unit serialization -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/Index/IndexUnitWriter.h" +#include "IndexDataStoreUtils.h" +#include "clang/Basic/FileManager.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Bitcode/BitstreamWriter.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" + +using namespace clang; +using namespace clang::index; +using namespace clang::index::store; +using namespace llvm; + +/// File path storage suitable for serialization. +/// +/// Maintains a deduplicated list of file paths, represented by their prefix +/// kind (working directory, system root, or none) and the offsets and lengths +/// of the remaining directory and filename components in a shared string +/// buffer. +class IndexUnitWriter::PathStorage { + std::string WorkDir; + std::string SysrootPath; + SmallString<512> PathsBuf; + StringMap Dirs; + std::vector FileBitPaths; + DenseMap FileToIndex; + +public: + PathStorage(StringRef workDir, StringRef sysrootPath) { + WorkDir = workDir; + if (sys::path::root_path(sysrootPath) == sysrootPath) + sysrootPath = StringRef(); + SysrootPath = sysrootPath; + } + + StringRef getPathsBuffer() const { return PathsBuf.str(); } + + ArrayRef getBitPaths() const { return FileBitPaths; } + + int getPathIndex(const FileEntry *FE) { + if (!FE) + return -1; + auto Pair = FileToIndex.insert(std::make_pair(FE, FileBitPaths.size())); + bool IsNew = Pair.second; + size_t Index = Pair.first->getSecond(); + + if (IsNew) { + StringRef Filename = sys::path::filename(FE->getName()); + DirBitPath Dir = getDirBitPath(sys::path::parent_path(FE->getName())); + FileBitPaths.emplace_back( + Dir.PrefixKind, Dir.Dir, + BitPathComponent(getPathOffset(Filename), Filename.size())); + } + return Index; + } + + size_t getPathOffset(StringRef Path) { + if (Path.empty()) + return 0; + size_t offset = PathsBuf.size(); + PathsBuf += Path; + return offset; + } + +private: + DirBitPath getDirBitPath(StringRef dirStr) { + auto pair = Dirs.insert(std::make_pair(dirStr, DirBitPath())); + bool isNew = pair.second; + auto &dirPath = pair.first->second; + + if (isNew) { + if (isPathInDir(SysrootPath, dirStr)) { + dirPath.PrefixKind = UNIT_PATH_PREFIX_SYSROOT; + dirStr = dirStr.drop_front(SysrootPath.size()); + while (!dirStr.empty() && dirStr[0] == '/') + dirStr = dirStr.drop_front(); + } else if (isPathInDir(WorkDir, dirStr)) { + dirPath.PrefixKind = UNIT_PATH_PREFIX_WORKDIR; + dirStr = dirStr.drop_front(WorkDir.size()); + while (!dirStr.empty() && dirStr[0] == '/') + dirStr = dirStr.drop_front(); + } + dirPath.Dir.Offset = getPathOffset(dirStr); + dirPath.Dir.Size = dirStr.size(); + } + return dirPath; + } + + static bool isPathInDir(StringRef dir, StringRef path) { + if (dir.empty() || !path.startswith(dir)) + return false; + StringRef rest = path.drop_front(dir.size()); + return !rest.empty() && sys::path::is_separator(rest.front()); + } +}; + +IndexUnitWriter::IndexUnitWriter( + FileManager &FileMgr, StringRef StorePath, StringRef ProviderIdentifier, + StringRef ProviderVersion, StringRef OutputFile, StringRef ModuleName, + const FileEntry *MainFile, bool IsSystem, bool IsModuleUnit, + bool IsDebugCompilation, StringRef TargetTriple, StringRef SysrootPath, + writer::ModuleInfoWriterCallback GetInfoForModule) + : FileMgr(FileMgr) { + this->UnitsPath = StorePath; + store::appendUnitSubDir(this->UnitsPath); + this->ProviderIdentifier = ProviderIdentifier; + this->ProviderVersion = ProviderVersion; + this->OutputFile = OutputFile; + this->ModuleName = ModuleName; + this->MainFile = MainFile; + this->IsSystemUnit = IsSystem; + this->IsModuleUnit = IsModuleUnit; + this->IsDebugCompilation = IsDebugCompilation; + this->TargetTriple = TargetTriple; + this->SysrootPath = SysrootPath; + this->GetInfoForModuleFn = GetInfoForModule; +} + +IndexUnitWriter::~IndexUnitWriter() {} + +int IndexUnitWriter::addModule(writer::OpaqueModule Mod) { + if (!Mod) + return -1; + + auto Pair = IndexByModule.insert(std::make_pair(Mod, Modules.size())); + bool WasInserted = Pair.second; + if (WasInserted) { + Modules.push_back(Mod); + } + return Pair.first->second; +} + +int IndexUnitWriter::addFileDependency(const FileEntry *File, bool IsSystem, + writer::OpaqueModule Mod) { + assert(File); + auto Pair = IndexByFile.insert(std::make_pair(File, Files.size())); + bool WasInserted = Pair.second; + if (WasInserted) { + Files.push_back(FileEntryData{File, IsSystem, addModule(Mod), {}}); + } + return Pair.first->second; +} + +void IndexUnitWriter::addRecordFile(StringRef RecordFile, const FileEntry *File, + bool IsSystem, writer::OpaqueModule Mod) { + int Dep = File ? addFileDependency(File, IsSystem, /*module=*/nullptr) : -1; + Records.push_back( + RecordOrUnitData{RecordFile, Dep, addModule(Mod), IsSystem}); +} + +void IndexUnitWriter::addASTFileDependency(const FileEntry *File, bool IsSystem, + writer::OpaqueModule Mod, + bool withoutUnitName) { + assert(File); + if (!SeenASTFiles.insert(File).second) + return; + + SmallString<64> UnitName; + if (!withoutUnitName) + getUnitNameForOutputFile(File->getName(), UnitName); + addUnitDependency(UnitName.str(), File, IsSystem, Mod); +} + +void IndexUnitWriter::addUnitDependency(StringRef UnitFile, + const FileEntry *File, bool IsSystem, + writer::OpaqueModule Mod) { + int Dep = File ? addFileDependency(File, IsSystem, /*module=*/nullptr) : -1; + ASTFileUnits.push_back( + RecordOrUnitData{UnitFile, Dep, addModule(Mod), IsSystem}); +} + +bool IndexUnitWriter::addInclude(const FileEntry *Source, unsigned Line, + const FileEntry *Target) { + // FIXME: This will ignore includes of headers that resolve to module imports + // because the 'target' header has not been added as a file dependency earlier + // so it is missing from \c IndexByFile. + + auto It = IndexByFile.find(Source); + if (It == IndexByFile.end()) + return false; + int SourceIndex = It->getSecond(); + It = IndexByFile.find(Target); + if (It == IndexByFile.end()) + return false; + int TargetIndex = It->getSecond(); + Files[SourceIndex].Includes.emplace_back(FileInclude{TargetIndex, Line}); + return true; +} + +void IndexUnitWriter::getUnitNameForOutputFile(StringRef FilePath, + SmallVectorImpl &Str) { + SmallString<256> AbsPath(FilePath); + FileMgr.makeAbsolutePath(AbsPath); + return getUnitNameForAbsoluteOutputFile(AbsPath, Str); +} + +void IndexUnitWriter::getUnitPathForOutputFile(StringRef FilePath, + SmallVectorImpl &Str) { + Str.append(UnitsPath.begin(), UnitsPath.end()); + auto Seperator = sys::path::get_separator(); + Str.append(Seperator.begin(), Seperator.end()); + return getUnitNameForOutputFile(FilePath, Str); +} + +Optional IndexUnitWriter::isUnitUpToDateForOutputFile( + StringRef FilePath, Optional TimeCompareFilePath, + std::string &Error) { + SmallString<256> UnitPath; + getUnitPathForOutputFile(FilePath, UnitPath); + + llvm::sys::fs::file_status UnitStat; + if (std::error_code EC = llvm::sys::fs::status(UnitPath.c_str(), UnitStat)) { + if (EC != llvm::errc::no_such_file_or_directory) { + llvm::raw_string_ostream Err(Error); + Err << "could not access path '" << UnitPath << "': " << EC.message(); + return None; + } + return false; + } + + if (!TimeCompareFilePath.hasValue()) + return true; + + llvm::sys::fs::file_status CompareStat; + if (std::error_code EC = + llvm::sys::fs::status(*TimeCompareFilePath, CompareStat)) { + if (EC != llvm::errc::no_such_file_or_directory) { + llvm::raw_string_ostream Err(Error); + Err << "could not access path '" << *TimeCompareFilePath + << "': " << EC.message(); + return None; + } + return true; + } + + // Return true (unit is up-to-date) if the file to compare is older than the + // unit file. + return CompareStat.getLastModificationTime() <= + UnitStat.getLastModificationTime(); +} + +void IndexUnitWriter::getUnitNameForAbsoluteOutputFile( + StringRef FilePath, SmallVectorImpl &Str) { + StringRef Fname = sys::path::filename(FilePath); + Str.append(Fname.begin(), Fname.end()); + Str.push_back('-'); + llvm::hash_code PathHashVal = llvm::hash_value(FilePath); + llvm::APInt(64, PathHashVal).toString(Str, 36, /*Signed=*/false); +} + +static void writeBlockInfo(BitstreamWriter &Stream) { + RecordData Record; + + Stream.EnterBlockInfoBlock(); +#define BLOCK(X) emitBlockID(X##_ID, #X, Stream, Record) +#define RECORD(X) emitRecordID(X, #X, Stream, Record) + + BLOCK(UNIT_VERSION_BLOCK); + RECORD(UNIT_VERSION); + + BLOCK(UNIT_INFO_BLOCK); + RECORD(UNIT_INFO); + + BLOCK(UNIT_DEPENDENCIES_BLOCK); + RECORD(UNIT_DEPENDENCY); + + BLOCK(UNIT_INCLUDES_BLOCK); + RECORD(UNIT_INCLUDE); + + BLOCK(UNIT_PATHS_BLOCK); + RECORD(UNIT_PATH); + RECORD(UNIT_PATH_BUFFER); + + BLOCK(UNIT_MODULES_BLOCK); + RECORD(UNIT_MODULE); + RECORD(UNIT_MODULE_BUFFER); + +#undef RECORD +#undef BLOCK + Stream.ExitBlock(); +} + +static void writeVersionInfo(BitstreamWriter &Stream) { + using namespace llvm::sys; + + Stream.EnterSubblock(UNIT_VERSION_BLOCK_ID, 3); + + auto Abbrev = std::make_shared(); + Abbrev->Add(BitCodeAbbrevOp(UNIT_VERSION)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Store format version + unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev)); + + RecordData Record; + Record.push_back(UNIT_VERSION); + Record.push_back(STORE_FORMAT_VERSION); + Stream.EmitRecordWithAbbrev(AbbrevCode, Record); + + Stream.ExitBlock(); +} + +bool IndexUnitWriter::write(std::string &Error) { + using namespace llvm::sys; + + // Determine the working directory. + SmallString<128> CWDPath; + if (!FileMgr.getFileSystemOpts().WorkingDir.empty()) { + CWDPath = FileMgr.getFileSystemOpts().WorkingDir; + if (!path::is_absolute(CWDPath)) { + fs::make_absolute(CWDPath); + } + } else { + std::error_code EC = sys::fs::current_path(CWDPath); + if (EC) { + llvm::raw_string_ostream Err(Error); + Err << "failed to determine current working directory: " << EC.message(); + return true; + } + } + WorkDir = CWDPath.str(); + + SmallString<512> Buffer; + BitstreamWriter Stream(Buffer); + Stream.Emit('I', 8); + Stream.Emit('D', 8); + Stream.Emit('X', 8); + Stream.Emit('U', 8); + + PathStorage PathStore(WorkDir, SysrootPath); + + writeBlockInfo(Stream); + writeVersionInfo(Stream); + writeUnitInfo(Stream, PathStore); + writeDependencies(Stream, PathStore); + writeIncludes(Stream, PathStore); + writePaths(Stream, PathStore); + writeModules(Stream); + + SmallString<256> UnitPath; + getUnitPathForOutputFile(OutputFile, UnitPath); + + SmallString<128> TempPath; + TempPath = path::parent_path(UnitsPath); + TempPath += path::get_separator(); + TempPath += path::filename(UnitPath); + TempPath += "-%%%%%%%%"; + int TempFD; + if (llvm::sys::fs::createUniqueFile(TempPath.str(), TempFD, TempPath)) { + llvm::raw_string_ostream Err(Error); + Err << "failed to create temporary file: " << TempPath; + return true; + } + + raw_fd_ostream OS(TempFD, /*shouldClose=*/true); + OS.write(Buffer.data(), Buffer.size()); + OS.close(); + + std::error_code EC = fs::rename(/*from=*/TempPath.c_str(), + /*to=*/UnitPath.c_str()); + if (EC) { + llvm::raw_string_ostream Err(Error); + Err << "failed to rename '" << TempPath << "' to '" << UnitPath + << "': " << EC.message(); + return true; + } + + return false; +} + +void IndexUnitWriter::writeUnitInfo(llvm::BitstreamWriter &Stream, + PathStorage &PathStore) { + Stream.EnterSubblock(UNIT_INFO_BLOCK_ID, 3); + + auto Abbrev = std::make_shared(); + Abbrev->Add(BitCodeAbbrevOp(UNIT_INFO)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsSystemUnit + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 10)); // WorkDir offset + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // WorkDir size + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 10)); // OutputFile offset + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // OutputFile size + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 10)); // Sysroot offset + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Sysroot size + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 10)); // Main path id + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsDebugCompilation + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsModuleUnit + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 5)); // Module name size + // ProviderIdentifier size + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 5)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 5)); // ProviderVersion size + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 5)); // ProviderDataVersion + // Module name + ProviderIdentifier + ProviderVersion + target triple + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); + unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev)); + + RecordData Record; + Record.push_back(UNIT_INFO); + Record.push_back(IsSystemUnit); + Record.push_back(PathStore.getPathOffset(WorkDir)); + Record.push_back(WorkDir.size()); + Record.push_back(PathStore.getPathOffset(OutputFile)); + Record.push_back(OutputFile.size()); + Record.push_back(PathStore.getPathOffset(SysrootPath)); + Record.push_back(SysrootPath.size()); + // Make 1-based with 0=invalid + Record.push_back(PathStore.getPathIndex(MainFile) + 1); + Record.push_back(IsDebugCompilation); + Record.push_back(IsModuleUnit); + Record.push_back(ModuleName.size()); + Record.push_back(ProviderIdentifier.size()); + Record.push_back(ProviderVersion.size()); + // ProviderDataVersion is reserved. Not sure it is a good to idea to have + // clients consider the specifics of a 'provider data version', but reserving + // to avoid store format version change in case there is a use case in the + // future. + Record.push_back(0); // ProviderDataVersion + SmallString<128> InfoStrings; + InfoStrings += ModuleName; + InfoStrings += ProviderIdentifier; + InfoStrings += ProviderVersion; + InfoStrings += TargetTriple; + Stream.EmitRecordWithBlob(AbbrevCode, Record, InfoStrings); + + Stream.ExitBlock(); +} + +void IndexUnitWriter::writeDependencies(llvm::BitstreamWriter &Stream, + PathStorage &PathStore) { + std::vector FileUsedForRecordOrUnit; + FileUsedForRecordOrUnit.resize(Files.size()); + + Stream.EnterSubblock(UNIT_DEPENDENCIES_BLOCK_ID, 3); + + auto Abbrev = std::make_shared(); + Abbrev->Add(BitCodeAbbrevOp(UNIT_DEPENDENCY)); + // Dependency kind + Abbrev->Add( + BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, UnitDependencyKindBitNum)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsSystem + // PathIndex (1-based, 0 = none) + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 10)); + // ModuleIndex (1-based, 0 = none) + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 32)); // time_t + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // file size + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name + unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev)); + + RecordData Record; + + auto addRecordOrUnitData = [&](UnitDependencyKind K, + const RecordOrUnitData &Data) { + Record.push_back(UNIT_DEPENDENCY); + Record.push_back(K); + Record.push_back(Data.IsSystem); + if (Data.FileIndex != -1) { + Record.push_back(PathStore.getPathIndex(Files[Data.FileIndex].File) + 1); + FileUsedForRecordOrUnit[Data.FileIndex] = true; + } else { + Record.push_back(0); + } + if (Data.ModuleIndex != -1) { + Record.push_back(Data.ModuleIndex + 1); + } else { + Record.push_back(0); + } + if (Data.FileIndex != -1) { + Record.push_back(Files[Data.FileIndex].File->getModificationTime()); + Record.push_back(Files[Data.FileIndex].File->getSize()); + } else { + Record.push_back(0); + Record.push_back(0); + } + Stream.EmitRecordWithBlob(AbbrevCode, Record, Data.Name); + }; + + for (auto &ASTData : ASTFileUnits) { + Record.clear(); + addRecordOrUnitData(UNIT_DEPEND_KIND_UNIT, ASTData); + } + for (auto &recordData : Records) { + Record.clear(); + addRecordOrUnitData(UNIT_DEPEND_KIND_RECORD, recordData); + } + size_t FileIndex = 0; + for (auto &File : Files) { + if (FileUsedForRecordOrUnit[FileIndex++]) + continue; + Record.clear(); + Record.push_back(UNIT_DEPENDENCY); + Record.push_back(UNIT_DEPEND_KIND_FILE); + Record.push_back(File.IsSystem); + Record.push_back(PathStore.getPathIndex(File.File) + 1); + if (File.ModuleIndex != -1) { + Record.push_back(File.ModuleIndex + 1); + } else { + Record.push_back(0); + } + Record.push_back(File.File->getModificationTime()); + Record.push_back(File.File->getSize()); + Stream.EmitRecordWithBlob(AbbrevCode, Record, StringRef()); + } + + Stream.ExitBlock(); +} + +void IndexUnitWriter::writeIncludes(llvm::BitstreamWriter &Stream, + PathStorage &PathStore) { + Stream.EnterSubblock(UNIT_INCLUDES_BLOCK_ID, 3); + + auto Abbrev = std::make_shared(); + Abbrev->Add(BitCodeAbbrevOp(UNIT_INCLUDE)); + // source path index (1-based, 0 = no path) + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 10)); + // source include line + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 12)); + // target path index (1-based, 0 = no path) + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 10)); + unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev)); + + RecordData Record; + + for (auto &Including : Files) { + for (auto &Included : Including.Includes) { + Record.clear(); + Record.push_back(UNIT_INCLUDE); + Record.push_back(PathStore.getPathIndex(Including.File) + 1); + Record.push_back(Included.Line); + Record.push_back(PathStore.getPathIndex(Files[Included.Index].File) + 1); + Stream.EmitRecordWithAbbrev(AbbrevCode, Record); + } + } + Stream.ExitBlock(); +} + +void IndexUnitWriter::writePaths(llvm::BitstreamWriter &Stream, + PathStorage &PathStore) { + Stream.EnterSubblock(UNIT_PATHS_BLOCK_ID, 3); + + auto PathAbbrev = std::make_shared(); + PathAbbrev->Add(BitCodeAbbrevOp(UNIT_PATH)); + // Path prefix kind + PathAbbrev->Add( + BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, UnitFilePathPrefixKindBitNum)); + PathAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 10)); // DirPath offset + PathAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // DirPath size + PathAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 10)); // Filename offset + PathAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Filename size + unsigned PathAbbrevCode = Stream.EmitAbbrev(std::move(PathAbbrev)); + + auto PathBufferAbbrev = std::make_shared(); + PathBufferAbbrev->Add(BitCodeAbbrevOp(UNIT_PATH_BUFFER)); + PathBufferAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Paths buffer + unsigned PathBufferAbbrevCode = Stream.EmitAbbrev(PathBufferAbbrev); + + RecordData Record; + for (auto &BitPath : PathStore.getBitPaths()) { + Record.push_back(UNIT_PATH); + Record.push_back(BitPath.PrefixKind); + Record.push_back(BitPath.Dir.Offset); + Record.push_back(BitPath.Dir.Size); + Record.push_back(BitPath.Filename.Offset); + Record.push_back(BitPath.Filename.Size); + Stream.EmitRecordWithAbbrev(PathAbbrevCode, Record); + Record.clear(); + } + + Record.push_back(UNIT_PATH_BUFFER); + Stream.EmitRecordWithBlob(PathBufferAbbrevCode, Record, + PathStore.getPathsBuffer()); + + Stream.ExitBlock(); +} + +void IndexUnitWriter::writeModules(llvm::BitstreamWriter &Stream) { + Stream.EnterSubblock(UNIT_MODULES_BLOCK_ID, 3); + + auto Abbrev = std::make_shared(); + Abbrev->Add(BitCodeAbbrevOp(UNIT_MODULE)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 9)); // Module name offset + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Module name size + unsigned AbbrevCode = Stream.EmitAbbrev(std::move(Abbrev)); + + auto BufferAbbrev = std::make_shared(); + BufferAbbrev->Add(BitCodeAbbrevOp(UNIT_MODULE_BUFFER)); + // Module names buffer + BufferAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); + unsigned BufferAbbrevCode = Stream.EmitAbbrev(BufferAbbrev); + + SmallString<512> ModuleNamesBuf; + + RecordData Record; + for (auto &Mod : Modules) { + SmallString<64> ModuleName; + StringRef name = GetInfoForModuleFn(Mod, ModuleName).Name; + size_t offset = ModuleNamesBuf.size(); + ModuleNamesBuf += name; + + Record.push_back(UNIT_MODULE); + Record.push_back(offset); + Record.push_back(name.size()); + Stream.EmitRecordWithAbbrev(AbbrevCode, Record); + Record.clear(); + } + + Record.push_back(UNIT_MODULE_BUFFER); + Stream.EmitRecordWithBlob(BufferAbbrevCode, Record, ModuleNamesBuf.str()); + + Stream.ExitBlock(); +} + +bool IndexUnitWriter::initIndexDirectory(StringRef StorePath, + std::string &Error) { + using namespace llvm::sys; + SmallString<128> SubPath = StorePath; + store::appendRecordSubDir(SubPath); + std::error_code EC = fs::create_directories(SubPath); + if (EC) { + llvm::raw_string_ostream Err(Error); + Err << "failed to create directory '" << SubPath << "': " << EC.message(); + return true; + } + + SubPath = StorePath; + store::appendUnitSubDir(SubPath); + EC = fs::create_directory(SubPath); + if (EC) { + llvm::raw_string_ostream Err(Error); + Err << "failed to create directory '" << SubPath << "': " << EC.message(); + return true; + } + + return false; +} Index: lib/Index/IndexingAction.cpp =================================================================== --- lib/Index/IndexingAction.cpp +++ lib/Index/IndexingAction.cpp @@ -8,7 +8,9 @@ //===----------------------------------------------------------------------===// #include "clang/Index/IndexingAction.h" +#include "ClangIndexRecordWriter.h" #include "FileIndexRecord.h" +#include "IndexDataStoreUtils.h" #include "IndexingContext.h" #include "clang/Basic/FileManager.h" #include "clang/Frontend/CompilerInstance.h" @@ -18,6 +20,7 @@ #include "clang/Frontend/Utils.h" #include "clang/Index/IndexDataConsumer.h" #include "clang/Index/IndexDiagnostic.h" +#include "clang/Index/IndexUnitWriter.h" #include "clang/Lex/Preprocessor.h" #include "clang/Serialization/ASTReader.h" @@ -537,6 +540,19 @@ } // anonymous namespace +static std::string getClangVersion() { + // Try picking the version from an Apple Clang tag. + std::string RepositoryPath = getClangRepositoryPath(); + StringRef BuildNumber = RepositoryPath; + size_t DashOffset = BuildNumber.find('-'); + if (BuildNumber.startswith("clang") && DashOffset != StringRef::npos) { + BuildNumber = BuildNumber.substr(DashOffset + 1); + return BuildNumber; + } + // Fallback to the generic version. + return CLANG_VERSION_STRING; +} + static void writeUnitData(const CompilerInstance &CI, IndexDataRecorder &Recorder, IndexDependencyProvider &DepProvider, @@ -562,7 +578,15 @@ } diagClientBeginEndRAII(CI); SourceManager &SM = CI.getSourceManager(); + DiagnosticsEngine &Diag = CI.getDiagnostics(); HeaderSearch &HS = CI.getPreprocessor().getHeaderSearchInfo(); + StringRef DataPath = RecordOpts.DataDirPath; + + std::string Error; + if (IndexUnitWriter::initIndexDirectory(DataPath, Error)) { + Diag.Report(diag::err_index_store_dir_create_failed) << Error; + return; + } std::string OutputFile = CI.getFrontendOpts().OutputFile; if (OutputFile.empty()) { @@ -586,6 +610,14 @@ OutputFile, RootFile, UnitMod, IsSystemCache.getSysrootPath()); } +/// Checks if the unit file exists for module file, if it doesn't it generates +/// index data for it. +static bool produceIndexDataForModuleFile(serialization::ModuleFile &Mod, + const CompilerInstance &CI, + IndexingOptions IndexOpts, + RecordingOptions RecordOpts, + IndexUnitWriter &ParentUnitWriter); + static void writeUnitData(const CompilerInstance &CI, IndexDataRecorder &Recorder, IndexDependencyProvider &DepProvider, @@ -594,7 +626,197 @@ const FileEntry *RootFile, Module *UnitModule, StringRef SysrootPath) { - // TODO persist collected index data + SourceManager &SM = CI.getSourceManager(); + DiagnosticsEngine &Diag = CI.getDiagnostics(); + HeaderSearch &HS = CI.getPreprocessor().getHeaderSearchInfo(); + StringRef DataPath = RecordOpts.DataDirPath; + bool IsSystemUnit = UnitModule ? UnitModule->IsSystem : false; + bool IsModuleUnit = UnitModule != nullptr; + bool IsDebugCompilation = CI.getCodeGenOpts().OptimizationLevel == 0; + std::string ModuleName = + UnitModule ? UnitModule->getFullModuleName() : std::string(); + + auto getModuleInfo = + [](writer::OpaqueModule mod, + SmallVectorImpl &Scratch) -> writer::ModuleInfo { + assert(mod); + writer::ModuleInfo info; + std::string fullName = + static_cast(mod)->getFullModuleName(); + unsigned offset = Scratch.size(); + Scratch.append(fullName.begin(), fullName.end()); + info.Name = StringRef(Scratch.data() + offset, fullName.size()); + return info; + }; + + auto findModuleForHeader = [&](const FileEntry *FE) -> Module * { + if (!UnitModule) + return nullptr; + if (auto Mod = HS.findModuleForHeader(FE).getModule()) + if (Mod->isSubModuleOf(UnitModule)) + return Mod; + return nullptr; + }; + + IndexUnitWriter UnitWriter( + CI.getFileManager(), DataPath, "clang", getClangVersion(), OutputFile, + ModuleName, RootFile, IsSystemUnit, IsModuleUnit, IsDebugCompilation, + CI.getTargetOpts().Triple, SysrootPath, getModuleInfo); + + DepProvider.visitFileDependencies( + CI, [&](const FileEntry *FE, bool isSystemFile) { + UnitWriter.addFileDependency(FE, isSystemFile, findModuleForHeader(FE)); + }); + DepProvider.visitIncludes( + [&](const FileEntry *Source, unsigned Line, const FileEntry *Target) { + UnitWriter.addInclude(Source, Line, Target); + }); + DepProvider.visitModuleImports(CI, [&](serialization::ModuleFile &Mod, + bool isSystemMod) { + Module *UnitMod = HS.lookupModule(Mod.ModuleName, /*AllowSearch=*/false); + UnitWriter.addASTFileDependency(Mod.File, isSystemMod, UnitMod); + if (Mod.isModule()) { + produceIndexDataForModuleFile(Mod, CI, IndexOpts, RecordOpts, UnitWriter); + } + }); + + ClangIndexRecordWriter RecordWriter(CI.getASTContext(), RecordOpts); + for (auto I = Recorder.record_begin(), E = Recorder.record_end(); I != E; + ++I) { + FileID FID = I->first; + const FileIndexRecord &Rec = *I->second; + const FileEntry *FE = SM.getFileEntryForID(FID); + std::string RecordFile; + std::string Error; + + if (RecordWriter.writeRecord(FE->getName(), Rec, Error, &RecordFile)) { + Diag.Report(diag::err_index_store_record_write_failed) + << RecordFile << Error; + return; + } + UnitWriter.addRecordFile(RecordFile, FE, Rec.isSystem(), + findModuleForHeader(FE)); + } + + std::string Error; + if (UnitWriter.write(Error)) { + Diag.Report(diag::err_index_store_unit_write_failed) << Error; + return; + } +} + +namespace { +/// Collects and provides the file and module dependency information for the +/// supplied \c ModuleFile +class ModuleFileIndexDependencyCollector : public IndexDependencyProvider { + serialization::ModuleFile &ModFile; + bool RecordSystemDependencies; + +public: + ModuleFileIndexDependencyCollector(serialization::ModuleFile &Mod, + bool RecordSystemDependencies) + : ModFile(Mod), RecordSystemDependencies(RecordSystemDependencies) {} + + void visitFileDependencies( + const CompilerInstance &CI, + llvm::function_ref visitor) + override { + auto Reader = CI.getModuleManager(); + Reader->visitInputFiles( + ModFile, RecordSystemDependencies, + /*Complain=*/false, + [&](const serialization::InputFile &IF, bool isSystem) { + auto *FE = IF.getFile(); + if (!FE) + return; + // Ignore module map files, they are not as important to track as + // source files and they may be auto-generated which would create an + // undesirable dependency on an intermediate build byproduct. + if (FE->getName().endswith("module.modulemap")) + return; + + visitor(FE, isSystem); + }); + } + + void + visitIncludes(llvm::function_ref + visitor) override { + // FIXME: Module files without a preprocessing record do not have info about + // include locations. Serialize enough data to be able to retrieve such + // info. + } + + void visitModuleImports( + const CompilerInstance &CI, + llvm::function_ref + visitor) override { + HeaderSearch &HS = CI.getPreprocessor().getHeaderSearchInfo(); + for (auto *Mod : ModFile.Imports) { + bool isSystemMod = false; + if (auto *M = HS.lookupModule(Mod->ModuleName, /*AllowSearch=*/false)) + isSystemMod = M->IsSystem; + if (!isSystemMod || RecordSystemDependencies) + visitor(*Mod, isSystemMod); + } + } +}; +} // anonymous namespace. + +static void indexModule(serialization::ModuleFile &Mod, + const CompilerInstance &CI, IndexingOptions IndexOpts, + RecordingOptions RecordOpts) { + DiagnosticsEngine &Diag = CI.getDiagnostics(); + Diag.Report(Mod.ImportLoc, diag::remark_index_producing_module_file_data) + << Mod.FileName; + + StringRef SysrootPath = CI.getHeaderSearchOpts().Sysroot; + HeaderSearch &HS = CI.getPreprocessor().getHeaderSearchInfo(); + Module *UnitMod = HS.lookupModule(Mod.ModuleName, /*AllowSearch=*/false); + assert(UnitMod && "only loaded modules should be indexed"); + + IndexDataRecorder Recorder; + IsSystemFileCache IsSystemCache(SysrootPath); + IndexingContext IndexCtx(IndexOpts, Recorder, IsSystemCache); + + IndexCtx.setASTContext(CI.getASTContext()); + Recorder.init(CI.getPreprocessor(), CI.getASTContext()); + + for (const Decl *D : CI.getModuleManager()->getModuleFileLevelDecls(Mod)) + IndexCtx.indexTopLevelDecl(D); + + Recorder.finish(); + + ModuleFileIndexDependencyCollector DepCollector( + Mod, RecordOpts.RecordSystemDependencies); + writeUnitData(CI, Recorder, DepCollector, IndexOpts, RecordOpts, Mod.FileName, + /*RootFile=*/nullptr, UnitMod, SysrootPath); +} + +static bool produceIndexDataForModuleFile(serialization::ModuleFile &Mod, + const CompilerInstance &CI, + IndexingOptions IndexOpts, + RecordingOptions RecordOpts, + IndexUnitWriter &ParentUnitWriter) { + DiagnosticsEngine &Diag = CI.getDiagnostics(); + std::string Error; + // We don't do timestamp check with the PCM file, on purpose. The PCM may get + // touched for various reasons which would cause unnecessary work to emit + // index data. User modules normally will get rebuilt and their index data + // re-emitted, and system modules are generally stable (and they can also can + // get rebuilt along with their index data). + auto IsUptodateOpt = + ParentUnitWriter.isUnitUpToDateForOutputFile(Mod.FileName, None, Error); + if (!IsUptodateOpt.hasValue()) { + Diag.Report(diag::err_index_store_file_status_failed) << Error; + return false; + } + if (*IsUptodateOpt) + return false; + + indexModule(Mod, CI, IndexOpts, RecordOpts); + return true; } static std::unique_ptr @@ -629,3 +851,20 @@ IndexAndRecordOpts.second, std::move(WrappedAction)); } + +bool index::emitIndexDataForModuleFile(const Module *Mod, + const CompilerInstance &CI, + IndexUnitWriter &ParentUnitWriter) { + index::IndexingOptions IndexOpts; + index::RecordingOptions RecordOpts; + auto IndexAndRecordOpts = + getIndexOptionsFromFrontendOptions(CI.getFrontendOpts()); + + auto astReader = CI.getModuleManager(); + serialization::ModuleFile *ModFile = + astReader->getModuleManager().lookup(Mod->getASTFile()); + assert(ModFile && "no module file loaded for module ?"); + return produceIndexDataForModuleFile(*ModFile, CI, IndexAndRecordOpts.first, + IndexAndRecordOpts.second, + ParentUnitWriter); +} Index: test/Index/Store/record-hash-using.cpp =================================================================== --- /dev/null +++ test/Index/Store/record-hash-using.cpp @@ -0,0 +1,46 @@ +// REQUIRES: shell + +// RUN: rm -rf %t +// RUN: %clang_cc1 %s -std=c++11 -index-store-path %t/idx -DTYPE1=A -DTYPE2=A -DTYPE3=T -DTYPE4=T +// RUN: %clang_cc1 %s -std=c++11 -index-store-path %t/idx -DTYPE1=B -DTYPE2=A -DTYPE3=T -DTYPE4=T +// RUN: %clang_cc1 %s -std=c++11 -index-store-path %t/idx -DTYPE1=A -DTYPE2=B -DTYPE3=T -DTYPE4=T +// RUN: %clang_cc1 %s -std=c++11 -index-store-path %t/idx -DTYPE1=B -DTYPE2=B -DTYPE3=T -DTYPE4=T +// RUN: find %t/idx/*/records -name "record-hash*" | count 4 +// +// RUN: rm -rf %t +// RUN: %clang_cc1 %s -std=c++11 -index-store-path %t/idx -DTYPE1=A -DTYPE2=A -DTYPE3=T -DTYPE4=T +// RUN: %clang_cc1 %s -std=c++11 -index-store-path %t/idx -DTYPE1=A -DTYPE2=A -DTYPE3=U -DTYPE4=T +// RUN: %clang_cc1 %s -std=c++11 -index-store-path %t/idx -DTYPE1=A -DTYPE2=A -DTYPE3=T -DTYPE4=U +// RUN: %clang_cc1 %s -std=c++11 -index-store-path %t/idx -DTYPE1=A -DTYPE2=A -DTYPE3=U -DTYPE4=U +// RUN: find %t/idx/*/records -name "record-hash*" | count 4 + +template +struct A { + typedef int X; + void foo(); +}; + +template +struct B : public A { + typedef float X; + void foo(int); +}; + +template +struct C : public B { +// This should result in different records, due to the different types. + using TYPE1::X; + using TYPE2::foo; +}; + +template +struct D { + typedef T X; + void foo(T); +}; +template +struct E : public D, public D { +// This should result in different records, due to the different template parameter. + using D::X; + using D::foo; +}; Index: test/Index/Store/record-hash.cpp =================================================================== --- /dev/null +++ test/Index/Store/record-hash.cpp @@ -0,0 +1,12 @@ +// REQUIRES: shell + +// RUN: rm -rf %t +// RUN: %clang_cc1 %s -index-store-path %t/idx -D THE_TYPE=long +// RUN: %clang_cc1 %s -index-store-path %t/idx -D THE_TYPE=char +// RUN: find %t/idx/*/records -name "record-hash*" | count 2 + +template +class TC {}; + +// This should result in different records, due to the different template parameter type. +void some_func(TC); Index: test/Index/Store/relative-out-path.c =================================================================== --- /dev/null +++ test/Index/Store/relative-out-path.c @@ -0,0 +1,19 @@ +// Needs 'find'. +// REQUIRES: shell + +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: %clang %s -index-store-path %t/idx1 -c -o %t/outfile.o +// RUN: cd %t +// RUN: %clang %s -index-store-path %t/idx2 -c -o outfile.o +// RUN: cd .. +// RUN: %clang %s -index-store-path %t/idx3 -fsyntax-only -o outfile.o -working-directory=%t +// RUN: diff -r -u %t/idx2 %t/idx3 + +// RUN: find %t/idx1 -name '*outfile.o*' > %t/hashes.txt +// RUN: find %t/idx3 -name '*outfile.o*' >> %t/hashes.txt +// RUN: FileCheck %s --input-file=%t/hashes.txt +// CHECK: outfile.o[[OUT_HASH:.*$]] +// CHECK-NEXT: outfile.o[[OUT_HASH]] + +void foo();