diff --git a/lldb/include/lldb/Core/DataFileCache.h b/lldb/include/lldb/Core/DataFileCache.h new file mode 100644 --- /dev/null +++ b/lldb/include/lldb/Core/DataFileCache.h @@ -0,0 +1,216 @@ +//===-- DataFileCache.h -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_CORE_DATAFILECACHE_H +#define LLDB_CORE_DATAFILECACHE_H + +#include "lldb/Utility/FileSpec.h" +#include "lldb/Utility/Status.h" +#include "lldb/Utility/UUID.h" +#include "lldb/lldb-forward.h" +#include "llvm/Support/Caching.h" +#include + +namespace lldb_private { + +/// This class enables data to be cached into a directory using the llvm +/// caching code. Data can be stored and accessed using a unique string key. +/// The data will be stored in the directory that is specified in the +/// DataFileCache constructor. The data will be stored in files that start with +/// "llvmcache-" where is the key name specified when getting to +/// setting cached data. +/// +/// Sample code for how to use the cache: +/// +/// DataFileCache cache("/tmp/lldb-test-cache"); +/// StringRef key("Key1"); +/// auto mem_buffer_up = cache.GetCachedData(key); +/// if (mem_buffer_up) { +/// printf("cached data:\n%s", mem_buffer_up->getBufferStart()); +/// } else { +/// std::vector data = { 'h', 'e', 'l', 'l', 'o', '\n' }; +/// cache.SetCachedData(key, data); +/// } + +class DataFileCache { +public: + /// Create a data file cache in the directory path that is specified. + /// + /// Data will be cached in files created in this directory when clients call + /// DataFileCache::SetCacheData. + DataFileCache(llvm::StringRef path); + + /// Get cached data from the cache directory for the specified key. + /// + /// Keys must be unique for any given data. This function attempts to see if + /// the data is available for the specified key and will return a valid memory + /// buffer is data is available. + /// + /// \param key + /// The unique string key that identifies data being cached. + /// + /// \return + /// A valid unique pointer to a memory buffer if the data is available, or + /// a unique pointer that contains NULL if the data is not available. + std::unique_ptr GetCachedData(llvm::StringRef key); + + /// Set cached data for the specified key. + /// + /// Setting the cached data will save a file in the cache directory to contain + /// the specified data. + /// + /// \param key + /// The unique string key that identifies data being cached. + /// + /// \return + /// True if the data was successfully cached, false otherwise. + bool SetCachedData(llvm::StringRef key, llvm::ArrayRef data); + + /// Remove the cache file associated with the key. + Status RemoveCacheFile(llvm::StringRef key); + +private: + /// Return the cache file that is associated with the key. + FileSpec GetCacheFilePath(llvm::StringRef key); + + llvm::FileCache m_cache_callback; + FileSpec m_cache_dir; + std::mutex m_mutex; + std::unique_ptr m_mem_buff_up; + bool m_take_ownership = false; +}; + +/// A signature for a given file on disk. +/// +/// Any files that are cached in the LLDB index cached need some data that +/// uniquely identifies a file on disk and this information should be written +/// into each cache file so we can validate if the cache file still matches +/// the file we are trying to load cached data for. Objects can fill out this +/// signature and then encode and decode them to validate the signatures +/// match. If they do not match, the cache file on disk should be removed as +/// it is out of date. +struct CacheSignature { + /// UUID of object file or module. + llvm::Optional m_uuid = llvm::None; + /// Modification time of file on disk. + llvm::Optional m_mod_time = llvm::None; + /// If this describes a .o file with a BSD archive, the BSD archive's + /// modification time will be in m_mod_time, and the .o file's modification + /// time will be in this m_obj_mod_time. + llvm::Optional m_obj_mod_time = llvm::None; + + CacheSignature() = default; + + /// Create a signature from a module. + CacheSignature(lldb_private::Module *module); + + /// Create a signature from an object file. + CacheSignature(lldb_private::ObjectFile *objfile); + + void Clear() { + m_uuid = llvm::None; + m_mod_time = llvm::None; + m_obj_mod_time = llvm::None; + } + + /// Return true if any of the signature member variables have valid values. + bool IsValid() const { + return m_uuid.hasValue() || m_mod_time.hasValue() || + m_obj_mod_time.hasValue(); + } + + /// Check if two signatures are the same. + bool operator!=(const CacheSignature &rhs) { + if (m_uuid != rhs.m_uuid) + return true; + if (m_mod_time != rhs.m_mod_time) + return true; + if (m_obj_mod_time != rhs.m_obj_mod_time) + return true; + return false; + } + /// Encode this object into a data encoder object. + /// + /// This allows this object to be serialized to disk. The CacheSignature + /// object must have at least one member variable that has a value in order to + /// be serialized so that we can match this data to when the cached file is + /// loaded at a later time. + /// + /// \param encoder + /// A data encoder object that serialized bytes will be encoded into. + /// + /// \return + /// True if a signature was encoded, and false if there were no member + /// variables that had value. False indicates this data should not be + /// cached to disk because we were unable to encode a valid signature. + bool Encode(DataEncoder &encoder); + + /// Decode a serialized version of this object from data. + /// + /// \param data + /// The decoder object that references the serialized data. + /// + /// \param offset_ptr + /// A pointer that contains the offset from which the data will be decoded + /// from that gets updated as data gets decoded. + /// + /// \return + /// True if the signature was successfully decoded, false otherwise. + bool Decode(const DataExtractor &data, lldb::offset_t *offset_ptr); +}; + +/// Many cache files require string tables to store data efficiently. This +/// class helps create string tables. +class ConstStringTable { +public: + ConstStringTable() = default; + /// Add a string into the string table. + /// + /// Add a string to the string table will only add the same string one time + /// and will return the offset in the string table buffer to that string. + /// String tables are easy to build with ConstString objects since most LLDB + /// classes for symbol or debug info use them already and they provide + /// permanent storage for the string. + /// + /// \param s + /// The string to insert into the string table. + /// + /// \return + /// The byte offset from the start of the string table for the inserted + /// string. Duplicate strings that get inserted will return the same + /// byte offset. + uint32_t Add(ConstString s); + + bool Encode(DataEncoder &encoder); + +private: + std::vector m_strings; + std::map m_string_to_offset; + /// Skip one byte to start the string table off with an empty string. + uint32_t m_next_offset = 1; +}; + +/// Many cache files require string tables to store data efficiently. This +/// class helps give out strings from a string table that was read from a +/// cache file. +class StringTableReader { +public: + StringTableReader() = default; + + llvm::StringRef Get(uint32_t offset) const; + + bool Decode(const DataExtractor &data, lldb::offset_t *offset_ptr); + +protected: + /// All of the strings in the string table are contained in m_data. + llvm::StringRef m_data; +}; + +} // namespace lldb_private + +#endif // LLDB_CORE_DATAFILECACHE_H diff --git a/lldb/include/lldb/Core/Mangled.h b/lldb/include/lldb/Core/Mangled.h --- a/lldb/include/lldb/Core/Mangled.h +++ b/lldb/include/lldb/Core/Mangled.h @@ -12,9 +12,8 @@ #include "lldb/lldb-enumerations.h" #include "lldb/lldb-forward.h" - +#include "lldb/lldb-types.h" #include "lldb/Utility/ConstString.h" - #include "llvm/ADT/StringRef.h" #include @@ -64,6 +63,15 @@ explicit Mangled(llvm::StringRef name); + bool operator==(const Mangled &rhs) const { + return m_mangled == rhs.m_mangled && + GetDemangledName() == rhs.GetDemangledName(); + } + + bool operator!=(const Mangled &rhs) const { + return !(*this == rhs); + } + /// Convert to pointer operator. /// /// This allows code to check a Mangled object to see if it contains a valid @@ -270,6 +278,35 @@ /// for s, otherwise the enumerator for the mangling scheme detected. static Mangled::ManglingScheme GetManglingScheme(llvm::StringRef const name); + /// Decode a serialized version of this object from data. + /// + /// \param data + /// The decoder object that references the serialized data. + /// + /// \param offset_ptr + /// A pointer that contains the offset from which the data will be decoded + /// from that gets updated as data gets decoded. + /// + /// \param strtab + /// All strings in cache files are put into string tables for efficiency + /// and cache file size reduction. Strings are stored as uint32_t string + /// table offsets in the cache data. + bool Decode(const DataExtractor &data, lldb::offset_t *offset_ptr, + const StringTableReader &strtab); + + /// Encode this object into a data encoder object. + /// + /// This allows this object to be serialized to disk. + /// + /// \param encoder + /// A data encoder object that serialized bytes will be encoded into. + /// + /// \param strtab + /// All strings in cache files are put into string tables for efficiency + /// and cache file size reduction. Strings are stored as uint32_t string + /// table offsets in the cache data. + void Encode(DataEncoder &encoder, ConstStringTable &strtab) const; + private: /// Mangled member variables. ConstString m_mangled; ///< The mangled version of the name diff --git a/lldb/include/lldb/Core/Module.h b/lldb/include/lldb/Core/Module.h --- a/lldb/include/lldb/Core/Module.h +++ b/lldb/include/lldb/Core/Module.h @@ -945,6 +945,43 @@ bool m_match_name_after_lookup = false; }; + /// Get a unique hash for this module. + /// + /// The hash should be enough to identify the file on disk and the + /// architecture of the file. If the module represents an object inside of a + /// file, then the hash should include the object name and object offset to + /// ensure a unique hash. Some examples: + /// - just a regular object file (mach-o, elf, coff, etc) should create a hash + /// - a universal mach-o file that contains to multiple architectures, + /// each architecture slice should have a unique hash even though they come + /// from the same file + /// - a .o file inside of a BSD archive. Each .o file will have an object name + /// and object offset that should produce a unique hash. The object offset + /// is needed as BSD archive files can contain multiple .o files that have + /// the same name. + uint32_t Hash(); + + /// Get a unique cache key for the current module. + /// + /// The cache key must be unique for a file on disk and not change if the file + /// is updated. This allows cache data to use this key as a prefix and as + /// files are modified in disk, we will overwrite the cache files. If one file + /// can contain multiple files, like a universal mach-o file or like a BSD + /// archive, the cache key must contain enough information to differentiate + /// these different files. + std::string GetCacheKey(); + + /// Get the global index file cache. + /// + /// LLDB can cache data for a module between runs. This cache directory can be + /// used to stored data that previously was manually created each time you debug. + /// Examples include debug information indexes, symbol tables, symbol table + /// indexes, and more. + /// + /// \returns + /// If caching is enabled in the lldb settings, return a pointer to the data + /// file cache. If caching is not enabled, return NULL. + static DataFileCache *GetIndexCache(); protected: // Member Variables mutable std::recursive_mutex m_mutex; ///< A mutex to keep this object happy diff --git a/lldb/include/lldb/Core/ModuleList.h b/lldb/include/lldb/Core/ModuleList.h --- a/lldb/include/lldb/Core/ModuleList.h +++ b/lldb/include/lldb/Core/ModuleList.h @@ -60,6 +60,13 @@ bool SetClangModulesCachePath(const FileSpec &path); bool GetEnableExternalLookup() const; bool SetEnableExternalLookup(bool new_value); + bool GetEnableLLDBIndexCache() const; + bool SetEnableLLDBIndexCache(bool new_value); + uint64_t GetLLDBIndexCacheMaxByteSize(); + uint64_t GetLLDBIndexCacheMaxPercent(); + uint64_t GetLLDBIndexCacheExpirationDays(); + FileSpec GetLLDBIndexCachePath() const; + bool SetLLDBIndexCachePath(const FileSpec &path); PathMappingList GetSymlinkMappings() const; }; diff --git a/lldb/include/lldb/Host/FileSystem.h b/lldb/include/lldb/Host/FileSystem.h --- a/lldb/include/lldb/Host/FileSystem.h +++ b/lldb/include/lldb/Host/FileSystem.h @@ -142,6 +142,14 @@ void Resolve(FileSpec &file_spec); /// \} + /// Remove a single file. + /// + /// The path must specify a file and not a directory. + /// \{ + Status RemoveFile(const FileSpec &file_spec); + Status RemoveFile(const llvm::Twine &path); + /// \} + //// Create memory buffer from path. /// \{ std::shared_ptr CreateDataBuffer(const llvm::Twine &path, diff --git a/lldb/include/lldb/Symbol/ObjectFile.h b/lldb/include/lldb/Symbol/ObjectFile.h --- a/lldb/include/lldb/Symbol/ObjectFile.h +++ b/lldb/include/lldb/Symbol/ObjectFile.h @@ -19,6 +19,7 @@ #include "lldb/Utility/FileSpec.h" #include "lldb/Utility/UUID.h" #include "lldb/lldb-private.h" +#include "llvm/ADT/Optional.h" #include "llvm/Support/Threading.h" #include "llvm/Support/VersionTuple.h" @@ -707,6 +708,15 @@ return false; } + /// Get a hash that can be used for caching object file releated information. + /// + /// Data for object files can be cached between runs of debug sessions and + /// a module can end up using a main file and a symbol file, both of which + /// can be object files. So we need a unique hash that identifies an object + /// file when storing cached data. + uint32_t GetCacheHash(); + + protected: // Member variables. FileSpec m_file; @@ -729,6 +739,7 @@ /// need to use a std::unique_ptr to a llvm::once_flag so if we clear the /// symbol table, we can have a new once flag to use when it is created again. std::unique_ptr m_symtab_once_up; + llvm::Optional m_cache_hash; /// Sets the architecture for a module. At present the architecture can /// only be set if it is invalid. It is not allowed to switch from one diff --git a/lldb/include/lldb/Symbol/Symbol.h b/lldb/include/lldb/Symbol/Symbol.h --- a/lldb/include/lldb/Symbol/Symbol.h +++ b/lldb/include/lldb/Symbol/Symbol.h @@ -235,6 +235,46 @@ return "___lldb_unnamed_symbol"; } + /// Decode a serialized version of this object from data. + /// + /// \param data + /// The decoder object that references the serialized data. + /// + /// \param offset_ptr + /// A pointer that contains the offset from which the data will be decoded + /// from that gets updated as data gets decoded. + /// + /// \param section_list + /// A section list that allows lldb_private::Address objects to be filled + /// in. The address information for symbols are serilized as file addresses + /// and must be converted into Address objects with the right section and + /// offset. + /// + /// \param strtab + /// All strings in cache files are put into string tables for efficiency + /// and cache file size reduction. Strings are stored as uint32_t string + /// table offsets in the cache data. + /// + /// \return + /// True if the symbol is successfully decoded, false otherwise. + bool Decode(const DataExtractor &data, lldb::offset_t *offset_ptr, + const SectionList *section_list, const StringTableReader &strtab); + + /// Encode this object into a data encoder object. + /// + /// This allows this object to be serialized to disk. + /// + /// \param encoder + /// A data encoder object that serialized bytes will be encoded into. + /// + /// \param strtab + /// All strings in cache files are put into string tables for efficiency + /// and cache file size reduction. Strings are stored as uint32_t string + /// table offsets in the cache data. + void Encode(DataEncoder &encoder, ConstStringTable &strtab) const; + + bool operator==(const Symbol &rhs) const; + protected: // This is the internal guts of ResolveReExportedSymbol, it assumes // reexport_name is not null, and that module_spec is valid. We track the diff --git a/lldb/include/lldb/Symbol/Symtab.h b/lldb/include/lldb/Symbol/Symtab.h --- a/lldb/include/lldb/Symbol/Symtab.h +++ b/lldb/include/lldb/Symbol/Symtab.h @@ -131,7 +131,86 @@ bool add_demangled, bool add_mangled, NameToIndexMap &name_to_index_map) const; - ObjectFile *GetObjectFile() { return m_objfile; } + ObjectFile *GetObjectFile() const { return m_objfile; } + + /// Decode a serialized version of this object from data. + /// + /// \param data + /// The decoder object that references the serialized data. + /// + /// \param offset_ptr + /// A pointer that contains the offset from which the data will be decoded + /// from that gets updated as data gets decoded. + /// + /// \param[out] uuid_mismatch + /// Set to true if a cache file exists but the UUID didn't match, false + /// otherwise. + /// + /// \return + /// True if the symbol table is successfully decoded and can be used, + /// false otherwise. + bool Decode(const DataExtractor &data, lldb::offset_t *offset_ptr, + bool &uuid_mismatch); + + /// Encode this object into a data encoder object. + /// + /// This allows this object to be serialized to disk. The object file must + /// have a valid Signature in order to be serialized as it is used to make + /// sure the cached information matches when cached data is loaded at a later + /// time. If the object file doesn't have a valid signature false will be + /// returned and it will indicate we should not cache this data. + /// + /// \param encoder + /// A data encoder object that serialized bytes will be encoded into. + /// + /// \return + /// True if the symbol table's object file can generate a valid signature + /// and all data for the symbol table was encoded, false otherwise. + bool Encode(DataEncoder &encoder) const; + + /// Get the cache key string for this symbol table. + /// + /// The cache key must start with the module's cache key and is followed + /// by information that indicates this key is for caching the symbol table + /// contents and should also include the has of the object file. A module can + /// be represented by an ObjectFile object for the main executable, but can + /// also have a symbol file that is from the same or a different object file. + /// This means we might have two symbol tables cached in the index cache, one + /// for the main executable and one for the symbol file. + /// + /// \return + /// The unique cache key used to save and retrieve data from the index cache. + std::string GetCacheKey(); + + /// Save the symbol table data out into a cache. + /// + /// The symbol table will only be saved to a cache file if caching is enabled. + /// + /// We cache the contents of the symbol table since symbol tables in LLDB take + /// some time to initialize. This is due to the many sources for data that are + /// used to create a symbol table: + /// - standard symbol table + /// - dynamic symbol table (ELF) + /// - compressed debug info sections + /// - unwind information + /// - function pointers found in runtimes for global constructor/destructors + /// - other sources. + /// All of the above sources are combined and one symbol table results after + /// all sources have been considered. + void SaveToCache(); + + /// Load the symbol table from the index cache. + /// + /// Quickly load the finalized symbol table from the index cache. This saves + /// time when the debugger starts up. The index cache file for the symbol + /// table has the modification time set to the same time as the main module. + /// If the cache file exists and the modification times match, we will load + /// the symbol table from the serlized cache file. + /// + /// \return + /// True if the symbol table was successfully loaded from the index cache, + /// false if the symbol table wasn't cached or was out of date. + bool LoadFromCache(); protected: typedef std::vector collection; diff --git a/lldb/include/lldb/Utility/DataEncoder.h b/lldb/include/lldb/Utility/DataEncoder.h --- a/lldb/include/lldb/Utility/DataEncoder.h +++ b/lldb/include/lldb/Utility/DataEncoder.h @@ -158,6 +158,14 @@ /// A string reference that contains bytes to append. void AppendData(llvm::StringRef data); + /// Append a bytes to the end of the owned data. + /// + /// Append the bytes contained in the array reference. + /// + /// \param data + /// A array reference that contains bytes to append. + void AppendData(llvm::ArrayRef data); + /// Append a C string to the end of the owned data. /// /// Append the bytes contained in the string reference along with an extra @@ -243,6 +251,17 @@ /// A array reference to the data that this object references. llvm::ArrayRef GetData() const; + /// Get the number of bytes contained in this object. + /// + /// \return + /// The total number of bytes of data this object refers to. + size_t GetByteSize() const; + + lldb::ByteOrder GetByteOrder() const { return m_byte_order; } + + /// The address size to use when encoding pointers or addresses. + uint8_t GetAddressByteSize() const { return m_addr_size; } + private: uint32_t BytesLeft(uint32_t offset) const { const uint32_t size = GetByteSize(); @@ -267,20 +286,14 @@ /// object, \b false otherwise. bool ValidOffset(uint32_t offset) const { return offset < GetByteSize(); } - /// Get the number of bytes contained in this object. - /// - /// \return - /// The total number of bytes of data this object refers to. - size_t GetByteSize() const; - /// The shared pointer to data that can grow as data is added std::shared_ptr m_data_sp; /// The byte order of the data we are encoding to. - lldb::ByteOrder m_byte_order; + const lldb::ByteOrder m_byte_order; /// The address size to use when encoding pointers or addresses. - uint8_t m_addr_size; + const uint8_t m_addr_size; DataEncoder(const DataEncoder &) = delete; const DataEncoder &operator=(const DataEncoder &) = delete; diff --git a/lldb/include/lldb/lldb-forward.h b/lldb/include/lldb/lldb-forward.h --- a/lldb/include/lldb/lldb-forward.h +++ b/lldb/include/lldb/lldb-forward.h @@ -46,6 +46,7 @@ class Broadcaster; class BroadcasterManager; class CXXSyntheticChildren; +struct CacheSignature; class CallFrameInfo; class CommandInterpreter; class CommandInterpreterRunOptions; @@ -61,6 +62,7 @@ class Connection; class ConnectionFileDescriptor; class ConstString; +class ConstStringTable; class DWARFCallFrameInfo; class DWARFDataExtractor; class DWARFExpression; @@ -68,6 +70,7 @@ class DataBufferHeap; class DataEncoder; class DataExtractor; +class DataFileCache; class Debugger; class Declaration; class DiagnosticManager; @@ -199,6 +202,7 @@ class StreamFile; class StreamString; class StringList; +class StringTableReader; class StructuredDataImpl; class StructuredDataPlugin; class Symbol; diff --git a/lldb/packages/Python/lldbsuite/test/lldbtest.py b/lldb/packages/Python/lldbsuite/test/lldbtest.py --- a/lldb/packages/Python/lldbsuite/test/lldbtest.py +++ b/lldb/packages/Python/lldbsuite/test/lldbtest.py @@ -2515,7 +2515,8 @@ self.fail(self._formatMessage(msg, "'{}' is not success".format(error))) - def createTestTarget(self, file_path=None, msg=None): + def createTestTarget(self, file_path=None, msg=None, + load_dependent_modules=True): """ Creates a target from the file found at the given file path. Asserts that the resulting target is valid. @@ -2529,7 +2530,6 @@ error = lldb.SBError() triple = "" platform = "" - load_dependent_modules = True target = self.dbg.CreateTarget(file_path, triple, platform, load_dependent_modules, error) if error.Fail(): diff --git a/lldb/source/Core/CMakeLists.txt b/lldb/source/Core/CMakeLists.txt --- a/lldb/source/Core/CMakeLists.txt +++ b/lldb/source/Core/CMakeLists.txt @@ -25,6 +25,7 @@ AddressResolver.cpp AddressResolverFileLine.cpp Communication.cpp + DataFileCache.cpp Debugger.cpp Declaration.cpp Disassembler.cpp diff --git a/lldb/source/Core/CoreProperties.td b/lldb/source/Core/CoreProperties.td --- a/lldb/source/Core/CoreProperties.td +++ b/lldb/source/Core/CoreProperties.td @@ -13,6 +13,26 @@ Global, DefaultStringValue<"">, Desc<"Debug info path which should be resolved while parsing, relative to the host filesystem.">; + def EnableLLDBIndexCache: Property<"enable-lldb-index-cache", "Boolean">, + Global, + DefaultFalse, + Desc<"Enable caching for debug sessions in LLDB. LLDB can cache data for each module for improved performance in subsequent debug sessions.">; + def LLDBIndexCachePath: Property<"lldb-index-cache-path", "FileSpec">, + Global, + DefaultStringValue<"">, + Desc<"The path to the LLDB index cache directory.">; + def LLDBIndexCacheMaxByteSize: Property<"lldb-index-cache-max-byte-size", "UInt64">, + Global, + DefaultUnsignedValue<0>, + Desc<"The maximum size for the LLDB index cache directory in bytes. A value over the amount of available space on the disk will be reduced to the amount of available space. A value of 0 disables the absolute size-based pruning.">; + def LLDBIndexCacheMaxPercent: Property<"lldb-index-cache-max-percent", "UInt64">, + Global, + DefaultUnsignedValue<0>, + Desc<"The maximum size for the cache directory in terms of percentage of the available space on the disk. Set to 100 to indicate no limit, 50 to indicate that the cache size will not be left over half the available disk space. A value over 100 will be reduced to 100. A value of 0 disables the percentage size-based pruning.">; + def LLDBIndexCacheExpirationDays: Property<"lldb-index-cache-expiration-days", "UInt64">, + Global, + DefaultUnsignedValue<7>, + Desc<"The expiration time in days for a file. When a file hasn't been accessed for the specified amount of days, it is removed from the cache. A value of 0 disables the expiration-based pruning.">; } let Definition = "debugger" in { diff --git a/lldb/source/Core/DataFileCache.cpp b/lldb/source/Core/DataFileCache.cpp new file mode 100644 --- /dev/null +++ b/lldb/source/Core/DataFileCache.cpp @@ -0,0 +1,308 @@ +//===-- DataFileCache.cpp -------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/Core/DataFileCache.h" +#include "lldb/Core/Module.h" +#include "lldb/Core/ModuleList.h" +#include "lldb/Host/FileSystem.h" +#include "lldb/Symbol/ObjectFile.h" +#include "lldb/Utility/DataEncoder.h" +#include "lldb/Utility/Log.h" +#include "lldb/Utility/Logging.h" +#include "llvm/Support/CachePruning.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; +using namespace lldb_private; + +DataFileCache::DataFileCache(StringRef path) { + m_cache_dir.SetPath(path); + + // Prune the cache based off of the LLDB settings each time we create a cache + // object. + ModuleListProperties &properties = + ModuleList::GetGlobalModuleListProperties(); + CachePruningPolicy policy; + // Only scan once an hour. If we have lots of debug sessions we don't want + // to scan this directory too often. A timestamp file is written to the + // directory to ensure different processes don't scan the directory too often. + // This setting doesn't mean that a thread will continually scan the cache + // directory within this process. + policy.Interval = std::chrono::hours(1); + // Get the user settings for pruning. + policy.MaxSizeBytes = properties.GetLLDBIndexCacheMaxByteSize(); + policy.MaxSizePercentageOfAvailableSpace = + properties.GetLLDBIndexCacheMaxPercent(); + policy.Expiration = + std::chrono::hours(properties.GetLLDBIndexCacheExpirationDays() * 24); + pruneCache(path, policy); + + // This lambda will get called when the data is gotten from the cache and + // also after the data was set for a given key. We only need to take + // ownership of the data if we are geting the data, so we use the + // m_take_ownership member variable to indicate if we need to take + // ownership. + + auto add_buffer = [this](unsigned task, std::unique_ptr m) { + if (m_take_ownership) + m_mem_buff_up = std::move(m); + }; + Expected cache_or_err = + llvm::localCache("LLDBModuleCache", "lldb-module", path, add_buffer); + if (cache_or_err) + m_cache_callback = std::move(*cache_or_err); + else { + Log *log(lldb_private::GetLogIfAnyCategoriesSet(LIBLLDB_LOG_MODULES)); + LLDB_LOG_ERROR(log, cache_or_err.takeError(), + "failed to create lldb index cache directory: {0}"); + } +} + +std::unique_ptr +DataFileCache::GetCachedData(StringRef key) { + std::lock_guard guard(m_mutex); + + const unsigned task = 1; + m_take_ownership = true; + // If we call the "m_cache_callback" function and the data is cached, it will + // call the "add_buffer" lambda function from the constructor which will in + // turn take ownership of the member buffer that is passed to the callback and + // put it into a member variable. + Expected add_stream_or_err = m_cache_callback(task, key); + m_take_ownership = false; + // At this point we either already called the "add_buffer" lambda with + // the data or we haven't. We can tell if we got the cached data by checking + // the add_stream function pointer value below. + if (add_stream_or_err) { + AddStreamFn &add_stream = *add_stream_or_err; + // If the "add_stream" is nullptr, then the data was cached and we already + // called the "add_buffer" lambda. If it is valid, then if we were to call + // the add_stream function it would cause a cache file to get generated + // and we would be expected to fill in the data. In this function we only + // want to check if the data was cached, so we don't want to call + // "add_stream" in this function. + if (!add_stream) + return std::move(m_mem_buff_up); + } else { + Log *log(lldb_private::GetLogIfAnyCategoriesSet(LIBLLDB_LOG_MODULES)); + LLDB_LOG_ERROR(log, add_stream_or_err.takeError(), + "failed to get the cache add stream callback for key: {0}"); + } + // Data was not cached. + return std::unique_ptr(); +} + +bool DataFileCache::SetCachedData(StringRef key, llvm::ArrayRef data) { + std::lock_guard guard(m_mutex); + const unsigned task = 2; + // If we call this function and the data is cached, it will call the + // add_buffer lambda function from the constructor which will ignore the + // data. + Expected add_stream_or_err = m_cache_callback(task, key); + // If we reach this code then we either already called the callback with + // the data or we haven't. We can tell if we had the cached data by checking + // the CacheAddStream function pointer value below. + if (add_stream_or_err) { + AddStreamFn &add_stream = *add_stream_or_err; + // If the "add_stream" is nullptr, then the data was cached. If it is + // valid, then if we call the add_stream function with a task it will + // cause the file to get generated, but we only want to check if the data + // is cached here, so we don't want to call it here. Note that the + // add_buffer will also get called in this case after the data has been + // provided, but we won't take ownership of the memory buffer as we just + // want to write the data. + if (add_stream) { + Expected> file_or_err = + add_stream(task); + if (file_or_err) { + CachedFileStream *cfs = file_or_err->get(); + cfs->OS->write((const char *)data.data(), data.size()); + return true; + } else { + Log *log(lldb_private::GetLogIfAnyCategoriesSet(LIBLLDB_LOG_MODULES)); + LLDB_LOG_ERROR(log, file_or_err.takeError(), + "failed to get the cache file stream for key: {0}"); + } + } + } else { + Log *log(lldb_private::GetLogIfAnyCategoriesSet(LIBLLDB_LOG_MODULES)); + LLDB_LOG_ERROR(log, add_stream_or_err.takeError(), + "failed to get the cache add stream callback for key: {0}"); + } + return false; +} + +FileSpec DataFileCache::GetCacheFilePath(llvm::StringRef key) { + FileSpec cache_file(m_cache_dir); + std::string filename("llvmcache-"); + filename += key.str(); + cache_file.AppendPathComponent(filename); + return cache_file; +} + +Status DataFileCache::RemoveCacheFile(llvm::StringRef key) { + FileSpec cache_file = GetCacheFilePath(key); + FileSystem &fs = FileSystem::Instance(); + if (!fs.Exists(cache_file)) + return Status(); + return fs.RemoveFile(cache_file); +} + +CacheSignature::CacheSignature(lldb_private::Module *module) { + Clear(); + UUID uuid = module->GetUUID(); + if (uuid.IsValid()) + m_uuid = uuid; + + std::time_t mod_time = 0; + mod_time = llvm::sys::toTimeT(module->GetModificationTime()); + if (mod_time != 0) + m_mod_time = mod_time; + + mod_time = llvm::sys::toTimeT(module->GetObjectModificationTime()); + if (mod_time != 0) + m_obj_mod_time = mod_time; +} + +CacheSignature::CacheSignature(lldb_private::ObjectFile *objfile) { + Clear(); + UUID uuid = objfile->GetUUID(); + if (uuid.IsValid()) + m_uuid = uuid; + + std::time_t mod_time = 0; + // Grab the modification time of the object file's file. It isn't always the + // same as the module's file when you have a executable file as the main + // executable, and you have a object file for a symbol file. + FileSystem &fs = FileSystem::Instance(); + mod_time = llvm::sys::toTimeT(fs.GetModificationTime(objfile->GetFileSpec())); + if (mod_time != 0) + m_mod_time = mod_time; + + mod_time = + llvm::sys::toTimeT(objfile->GetModule()->GetObjectModificationTime()); + if (mod_time != 0) + m_obj_mod_time = mod_time; +} + +enum SignatureEncoding { + eSignatureUUID = 1u, + eSignatureModTime = 2u, + eSignatureObjectModTime = 3u, + eSignatureEnd = 255u, +}; + +bool CacheSignature::Encode(DataEncoder &encoder) { + if (!IsValid()) + return false; // Invalid signature, return false! + + if (m_uuid.hasValue()) { + llvm::ArrayRef uuid_bytes = m_uuid->GetBytes(); + encoder.AppendU8(eSignatureUUID); + encoder.AppendU8(uuid_bytes.size()); + encoder.AppendData(uuid_bytes); + } + if (m_mod_time.hasValue()) { + encoder.AppendU8(eSignatureModTime); + encoder.AppendU32(*m_mod_time); + } + if (m_obj_mod_time.hasValue()) { + encoder.AppendU8(eSignatureObjectModTime); + encoder.AppendU32(*m_obj_mod_time); + } + encoder.AppendU8(eSignatureEnd); + return true; +} + +bool CacheSignature::Decode(const DataExtractor &data, + lldb::offset_t *offset_ptr) { + Clear(); + while (uint8_t sig_encoding = data.GetU8(offset_ptr)) { + switch (sig_encoding) { + case eSignatureUUID: { + const uint8_t length = data.GetU8(offset_ptr); + const uint8_t *bytes = (const uint8_t *)data.GetData(offset_ptr, length); + if (bytes != nullptr && length > 0) + m_uuid = UUID::fromData(llvm::ArrayRef(bytes, length)); + } break; + case eSignatureModTime: { + uint32_t mod_time = data.GetU32(offset_ptr); + if (mod_time > 0) + m_mod_time = mod_time; + } break; + case eSignatureObjectModTime: { + uint32_t mod_time = data.GetU32(offset_ptr); + if (mod_time > 0) + m_mod_time = mod_time; + } break; + case eSignatureEnd: + return true; + default: + break; + } + } + return false; +} + +uint32_t ConstStringTable::Add(ConstString s) { + auto pos = m_string_to_offset.find(s); + if (pos != m_string_to_offset.end()) + return pos->second; + const uint32_t offset = m_next_offset; + m_strings.push_back(s); + m_string_to_offset[s] = offset; + m_next_offset += s.GetLength() + 1; + return offset; +} + +static const llvm::StringRef kStringTableIdentifier("STAB"); + +bool ConstStringTable::Encode(DataEncoder &encoder) { + // Write an 4 character code into the stream. This will help us when decoding + // to make sure we find this identifier when decoding the string table to make + // sure we have the rigth data. It also helps to identify the string table + // when dumping the hex bytes in a cache file. + encoder.AppendData(kStringTableIdentifier); + size_t length_offset = encoder.GetByteSize(); + encoder.AppendU32(0); // Total length of all strings which will be fixed up. + size_t strtab_offset = encoder.GetByteSize(); + encoder.AppendU8(0); // Start the string table with with an empty string. + for (auto s: m_strings) { + // Make sure all of the offsets match up with what we handed out! + const size_t stroff = encoder.GetByteSize() - strtab_offset; + auto pos = m_string_to_offset.find(s); + assert(pos->second == stroff); + // Append the C string into the encoder + encoder.AppendCString(s.GetStringRef()); + } + // Fixup the string table length. + encoder.PutU32(length_offset, encoder.GetByteSize() - strtab_offset); + return true; +} + +bool StringTableReader::Decode(const DataExtractor &data, + lldb::offset_t *offset_ptr) { + llvm::StringRef identifier((const char *)data.GetData(offset_ptr, 4), 4); + if (identifier != kStringTableIdentifier) + return false; + const uint32_t length = data.GetU32(offset_ptr); + // We always have at least one byte for the empty string at offset zero. + if (length == 0) + return false; + const char *bytes = (const char *)data.GetData(offset_ptr, length); + if (bytes == nullptr) + return false; + m_data = StringRef(bytes, length); + return true; +} + +StringRef StringTableReader::Get(uint32_t offset) const { + if (offset >= m_data.size()) + return StringRef(); + return StringRef(m_data.data() + offset); +} diff --git a/lldb/source/Core/Mangled.cpp b/lldb/source/Core/Mangled.cpp --- a/lldb/source/Core/Mangled.cpp +++ b/lldb/source/Core/Mangled.cpp @@ -8,9 +8,11 @@ #include "lldb/Core/Mangled.h" +#include "lldb/Core/DataFileCache.h" #include "lldb/Core/RichManglingContext.h" #include "lldb/Target/Language.h" #include "lldb/Utility/ConstString.h" +#include "lldb/Utility/DataEncoder.h" #include "lldb/Utility/Log.h" #include "lldb/Utility/Logging.h" #include "lldb/Utility/RegularExpression.h" @@ -411,3 +413,111 @@ s << ", demangled = "; return s; } + +// When encoding Mangled objects we can get away with encoding as little +// information as is required. The enumeration below helps us to efficiently +// encode Mangled objects. +enum MangledEncoding { + /// If the Mangled object has neither a mangled name or demangled name we can + /// encode the object with one zero byte using the Empty enumeration. + Empty = 0u, + /// If the Mangled object has only a demangled name and no mangled named, we + /// can encode only the demangled name. + DemangledOnly = 1u, + /// If the mangle name can calculate the demangled name (it is the + /// mangled/demangled counterpart), then we only need to encode the mangled + /// name as the demangled name can be recomputed. + MangledOnly = 2u, + /// If we have a Mangled object with two different names that are not related + /// then we need to save both strings. This can happen if we have a name that + /// isn't a true mangled name, but we want to be able to lookup a symbol by + /// name and type in the symbol table. We do this for Objective C symbols like + /// "OBJC_CLASS_$_NSValue" where the mangled named will be set to + /// "OBJC_CLASS_$_NSValue" and the demangled name will be manually set to + /// "NSValue". If we tried to demangled the name "OBJC_CLASS_$_NSValue" it + /// would fail, but in these cases we want these unrelated names to be + /// preserved. + MangledAndDemangled = 3u +}; + +bool Mangled::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr, + const StringTableReader &strtab) { + m_mangled.Clear(); + m_demangled.Clear(); + MangledEncoding encoding = (MangledEncoding)data.GetU8(offset_ptr); + switch (encoding) { + case Empty: + return true; + + case DemangledOnly: + m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr))); + return true; + + case MangledOnly: + m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr))); + return true; + + case MangledAndDemangled: + m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr))); + m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr))); + return true; + } + return false; +} +/// The encoding format for the Mangled object is as follows: +/// +/// uint8_t encoding; +/// char str1[]; (only if DemangledOnly, MangledOnly) +/// char str2[]; (only if MangledAndDemangled) +/// +/// The strings are stored as NULL terminated UTF8 strings and str1 and str2 +/// are only saved if we need them based on the encoding. +/// +/// Some mangled names have a mangled name that can be demangled by the built +/// in demanglers. These kinds of mangled objects know when the mangled and +/// demangled names are the counterparts for each other. This is done because +/// demangling is very expensive and avoiding demangling the same name twice +/// saves us a lot of compute time. For these kinds of names we only need to +/// save the mangled name and have the encoding set to "MangledOnly". +/// +/// If a mangled obejct has only a demangled name, then we save only that string +/// and have the encoding set to "DemangledOnly". +/// +/// Some mangled objects have both mangled and demangled names, but the +/// demangled name can not be computed from the mangled name. This is often used +/// for runtime named, like Objective C runtime V2 and V3 names. Both these +/// names must be saved and the encoding is set to "MangledAndDemangled". +/// +/// For a Mangled object with no names, we only need to set the encoding to +/// "Empty" and not store any string values. +void Mangled::Encode(DataEncoder &file, ConstStringTable &strtab) const { + MangledEncoding encoding = Empty; + if (m_mangled) { + encoding = MangledOnly; + if (m_demangled) { + // We have both mangled and demangled names. If the demangled name is the + // counterpart of the mangled name, then we only need to save the mangled + // named. If they are different, we need to save both. + ConstString s; + if (!(m_mangled.GetMangledCounterpart(s) && s == m_demangled)) + encoding = MangledAndDemangled; + } + } else if (m_demangled) { + encoding = DemangledOnly; + } + file.AppendU8(encoding); + switch (encoding) { + case Empty: + break; + case DemangledOnly: + file.AppendU32(strtab.Add(m_demangled)); + break; + case MangledOnly: + file.AppendU32(strtab.Add(m_mangled)); + break; + case MangledAndDemangled: + file.AppendU32(strtab.Add(m_mangled)); + file.AppendU32(strtab.Add(m_demangled)); + break; + } +} diff --git a/lldb/source/Core/Module.cpp b/lldb/source/Core/Module.cpp --- a/lldb/source/Core/Module.cpp +++ b/lldb/source/Core/Module.cpp @@ -10,6 +10,7 @@ #include "lldb/Core/AddressRange.h" #include "lldb/Core/AddressResolverFileLine.h" +#include "lldb/Core/DataFileCache.h" #include "lldb/Core/Debugger.h" #include "lldb/Core/FileSpecList.h" #include "lldb/Core/Mangled.h" @@ -55,7 +56,10 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/DJB.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/JSON.h" #include "llvm/Support/Signals.h" #include "llvm/Support/raw_ostream.h" @@ -1656,3 +1660,36 @@ return false; } + +uint32_t Module::Hash() { + std::string identifier; + llvm::raw_string_ostream id_strm(identifier); + id_strm << m_arch.GetTriple().str() << '-' << m_file.GetPath(); + if (m_object_name) + id_strm << '(' << m_object_name.GetStringRef() << ')'; + if (m_object_offset > 0) + id_strm << m_object_offset; + const auto mtime = llvm::sys::toTimeT(m_object_mod_time); + if (mtime > 0) + id_strm << mtime; + return llvm::djbHash(id_strm.str()); +} + +std::string Module::GetCacheKey() { + std::string key; + llvm::raw_string_ostream strm(key); + strm << m_arch.GetTriple().str() << '-' << m_file.GetFilename(); + if (m_object_name) + strm << '(' << m_object_name.GetStringRef() << ')'; + strm << '-' << llvm::format_hex(Hash(), 10); + return strm.str(); +} + +DataFileCache *Module::GetIndexCache() { + if (!ModuleList::GetGlobalModuleListProperties().GetEnableLLDBIndexCache()) + return nullptr; + // NOTE: intentional leak so we don't crash if global destructor chain gets + // called as other threads still use the result of this function + static DataFileCache *g_data_file_cache = new DataFileCache(ModuleList::GetGlobalModuleListProperties().GetLLDBIndexCachePath().GetPath()); + return g_data_file_cache; +} diff --git a/lldb/source/Core/ModuleList.cpp b/lldb/source/Core/ModuleList.cpp --- a/lldb/source/Core/ModuleList.cpp +++ b/lldb/source/Core/ModuleList.cpp @@ -85,6 +85,14 @@ if (clang::driver::Driver::getDefaultModuleCachePath(path)) { lldbassert(SetClangModulesCachePath(FileSpec(path))); } + + path.clear(); + if (llvm::sys::path::cache_directory(path)) { + llvm::sys::path::append(path, "lldb"); + llvm::sys::path::append(path, "IndexCache"); + lldbassert(SetLLDBIndexCachePath(FileSpec(path))); + } + } bool ModuleListProperties::GetEnableExternalLookup() const { @@ -110,6 +118,47 @@ nullptr, ePropertyClangModulesCachePath, path); } +FileSpec ModuleListProperties::GetLLDBIndexCachePath() const { + return m_collection_sp + ->GetPropertyAtIndexAsOptionValueFileSpec(nullptr, false, + ePropertyLLDBIndexCachePath) + ->GetCurrentValue(); +} + +bool ModuleListProperties::SetLLDBIndexCachePath(const FileSpec &path) { + return m_collection_sp->SetPropertyAtIndexAsFileSpec( + nullptr, ePropertyLLDBIndexCachePath, path); +} + +bool ModuleListProperties::GetEnableLLDBIndexCache() const { + const uint32_t idx = ePropertyEnableLLDBIndexCache; + return m_collection_sp->GetPropertyAtIndexAsBoolean( + nullptr, idx, g_modulelist_properties[idx].default_uint_value != 0); +} + +bool ModuleListProperties::SetEnableLLDBIndexCache(bool new_value) { + return m_collection_sp->SetPropertyAtIndexAsBoolean( + nullptr, ePropertyEnableLLDBIndexCache, new_value); +} + +uint64_t ModuleListProperties::GetLLDBIndexCacheMaxByteSize() { + const uint32_t idx = ePropertyLLDBIndexCacheMaxByteSize; + return m_collection_sp->GetPropertyAtIndexAsUInt64( + nullptr, idx, g_modulelist_properties[idx].default_uint_value); +} + +uint64_t ModuleListProperties::GetLLDBIndexCacheMaxPercent() { + const uint32_t idx = ePropertyLLDBIndexCacheMaxPercent; + return m_collection_sp->GetPropertyAtIndexAsUInt64( + nullptr, idx, g_modulelist_properties[idx].default_uint_value); +} + +uint64_t ModuleListProperties::GetLLDBIndexCacheExpirationDays() { + const uint32_t idx = ePropertyLLDBIndexCacheExpirationDays; + return m_collection_sp->GetPropertyAtIndexAsUInt64( + nullptr, idx, g_modulelist_properties[idx].default_uint_value); +} + void ModuleListProperties::UpdateSymlinkMappings() { FileSpecList list = m_collection_sp ->GetPropertyAtIndexAsOptionValueFileSpecList( diff --git a/lldb/source/Host/common/FileSystem.cpp b/lldb/source/Host/common/FileSystem.cpp --- a/lldb/source/Host/common/FileSystem.cpp +++ b/lldb/source/Host/common/FileSystem.cpp @@ -513,3 +513,11 @@ void FileSystem::SetHomeDirectory(std::string home_directory) { m_home_directory = std::move(home_directory); } + +Status FileSystem::RemoveFile(const FileSpec &file_spec) { + return RemoveFile(file_spec.GetPath()); +} + +Status FileSystem::RemoveFile(const llvm::Twine &path) { + return Status(llvm::sys::fs::remove(path)); +} diff --git a/lldb/source/Symbol/ObjectFile.cpp b/lldb/source/Symbol/ObjectFile.cpp --- a/lldb/source/Symbol/ObjectFile.cpp +++ b/lldb/source/Symbol/ObjectFile.cpp @@ -23,6 +23,8 @@ #include "lldb/Utility/Timer.h" #include "lldb/lldb-private.h" +#include "llvm/Support/DJB.h" + using namespace lldb; using namespace lldb_private; @@ -737,13 +739,24 @@ // not be able to access the symbol table contents since all APIs in Symtab // are protected by a mutex in the Symtab object itself. llvm::call_once(*m_symtab_once_up, [&]() { - ElapsedTime elapsed(module_sp->GetSymtabParseTime()); - Symtab *symtab = new Symtab(this); - std::lock_guard symtab_guard(symtab->GetMutex()); - m_symtab_up.reset(symtab); - ParseSymtab(*m_symtab_up); - m_symtab_up->Finalize(); + Symtab *symtab = new Symtab(this); + std::lock_guard symtab_guard(symtab->GetMutex()); + m_symtab_up.reset(symtab); + if (!m_symtab_up->LoadFromCache()) { + ElapsedTime elapsed(module_sp->GetSymtabParseTime()); + ParseSymtab(*m_symtab_up); + m_symtab_up->Finalize(); + } }); } return m_symtab_up.get(); } + +uint32_t ObjectFile::GetCacheHash() { + if (m_cache_hash) + return *m_cache_hash; + StreamString strm; + strm.Format("{0}-{1}-{2}", m_file, GetType(), GetStrata()); + m_cache_hash = llvm::djbHash(strm.GetString()); + return *m_cache_hash; +} diff --git a/lldb/source/Symbol/Symbol.cpp b/lldb/source/Symbol/Symbol.cpp --- a/lldb/source/Symbol/Symbol.cpp +++ b/lldb/source/Symbol/Symbol.cpp @@ -17,6 +17,7 @@ #include "lldb/Symbol/Symtab.h" #include "lldb/Target/Process.h" #include "lldb/Target/Target.h" +#include "lldb/Utility/DataEncoder.h" #include "lldb/Utility/Stream.h" using namespace lldb; @@ -595,3 +596,131 @@ m_mangled.SetDemangledName(ConstString(os.str())); } } + +bool Symbol::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr, + const SectionList *section_list, + const StringTableReader &strtab) { + if (!data.ValidOffsetForDataOfSize(*offset_ptr, 8)) + return false; + m_uid = data.GetU32(offset_ptr); + m_type_data = data.GetU16(offset_ptr); + const uint16_t bitfields = data.GetU16(offset_ptr); + m_type_data_resolved = (1u << 15 & bitfields) != 0; + m_is_synthetic = (1u << 14 & bitfields) != 0; + m_is_debug = (1u << 13 & bitfields) != 0; + m_is_external = (1u << 12 & bitfields) != 0; + m_size_is_sibling = (1u << 11 & bitfields) != 0; + m_size_is_synthesized = (1u << 10 & bitfields) != 0; + m_size_is_valid = (1u << 9 & bitfields) != 0; + m_demangled_is_synthesized = (1u << 8 & bitfields) != 0; + m_contains_linker_annotations = (1u << 7 & bitfields) != 0; + m_is_weak = (1u << 6 & bitfields) != 0; + m_type = bitfields & 0x003f; + if (!m_mangled.Decode(data, offset_ptr, strtab)) + return false; + if (!data.ValidOffsetForDataOfSize(*offset_ptr, 20)) + return false; + const bool is_addr = data.GetU8(offset_ptr) != 0; + const uint64_t value = data.GetU64(offset_ptr); + if (is_addr) { + m_addr_range.GetBaseAddress().ResolveAddressUsingFileSections( + value, section_list); + } else { + m_addr_range.GetBaseAddress().Clear(); + m_addr_range.GetBaseAddress().SetOffset(value); + } + m_addr_range.SetByteSize(data.GetU64(offset_ptr)); + m_flags = data.GetU32(offset_ptr); + return true; +} + +/// The encoding format for the symbol is as follows: +/// +/// uint32_t m_uid; +/// uint16_t m_type_data; +/// uint16_t bitfield_data; +/// Mangled mangled; +/// uint8_t is_addr; +/// uint64_t file_addr_or_value; +/// uint64_t size; +/// uint32_t flags; +/// +/// The only tricky thing in this encoding is encoding all of the bits in the +/// bitfields. We use a trick to store all bitfields as a 16 bit value and we +/// do the same thing when decoding the symbol. There are test that ensure this +/// encoding works for each individual bit. Everything else is very easy to +/// store. +void Symbol::Encode(DataEncoder &file, ConstStringTable &strtab) const { + file.AppendU32(m_uid); + file.AppendU16(m_type_data); + uint16_t bitfields = m_type; + if (m_type_data_resolved) + bitfields |= 1u << 15; + if (m_is_synthetic) + bitfields |= 1u << 14; + if (m_is_debug) + bitfields |= 1u << 13; + if (m_is_external) + bitfields |= 1u << 12; + if (m_size_is_sibling) + bitfields |= 1u << 11; + if (m_size_is_synthesized) + bitfields |= 1u << 10; + if (m_size_is_valid) + bitfields |= 1u << 9; + if (m_demangled_is_synthesized) + bitfields |= 1u << 8; + if (m_contains_linker_annotations) + bitfields |= 1u << 7; + if (m_is_weak) + bitfields |= 1u << 6; + file.AppendU16(bitfields); + m_mangled.Encode(file, strtab); + // A symbol's value might be an address, or it might be a constant. If the + // symbol's base address doesn't have a section, then it is a constant value. + // If it does have a section, we will encode the file address and re-resolve + // the address when we decode it. + bool is_addr = m_addr_range.GetBaseAddress().GetSection().get() != NULL; + file.AppendU8(is_addr); + file.AppendU64(m_addr_range.GetBaseAddress().GetFileAddress()); + file.AppendU64(m_addr_range.GetByteSize()); + file.AppendU32(m_flags); +} + +bool Symbol::operator==(const Symbol &rhs) const { + if (m_uid != rhs.m_uid) + return false; + if (m_type_data != rhs.m_type_data) + return false; + if (m_type_data_resolved != rhs.m_type_data_resolved) + return false; + if (m_is_synthetic != rhs.m_is_synthetic) + return false; + if (m_is_debug != rhs.m_is_debug) + return false; + if (m_is_external != rhs.m_is_external) + return false; + if (m_size_is_sibling != rhs.m_size_is_sibling) + return false; + if (m_size_is_synthesized != rhs.m_size_is_synthesized) + return false; + if (m_size_is_valid != rhs.m_size_is_valid) + return false; + if (m_demangled_is_synthesized != rhs.m_demangled_is_synthesized) + return false; + if (m_contains_linker_annotations != rhs.m_contains_linker_annotations) + return false; + if (m_is_weak != rhs.m_is_weak) + return false; + if (m_type != rhs.m_type) + return false; + if (m_mangled != rhs.m_mangled) + return false; + if (m_addr_range.GetBaseAddress() != rhs.m_addr_range.GetBaseAddress()) + return false; + if (m_addr_range.GetByteSize() != rhs.m_addr_range.GetByteSize()) + return false; + if (m_flags != rhs.m_flags) + return false; + return true; +} diff --git a/lldb/source/Symbol/Symtab.cpp b/lldb/source/Symbol/Symtab.cpp --- a/lldb/source/Symbol/Symtab.cpp +++ b/lldb/source/Symbol/Symtab.cpp @@ -9,6 +9,7 @@ #include #include +#include "lldb/Core/DataFileCache.h" #include "lldb/Core/Module.h" #include "lldb/Core/RichManglingContext.h" #include "lldb/Core/Section.h" @@ -17,11 +18,15 @@ #include "lldb/Symbol/SymbolContext.h" #include "lldb/Symbol/Symtab.h" #include "lldb/Target/Language.h" +#include "lldb/Utility/DataEncoder.h" +#include "lldb/Utility/Endian.h" #include "lldb/Utility/RegularExpression.h" #include "lldb/Utility/Stream.h" #include "lldb/Utility/Timer.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/DJB.h" using namespace lldb; using namespace lldb_private; @@ -109,7 +114,8 @@ s->Indent(); pos->Dump(s, target, std::distance(begin, pos), name_preference); } - } break; + } + break; case eSortOrderByName: { // Although we maintain a lookup by exact name map, the table isn't @@ -1004,6 +1010,7 @@ collection new_symbols(m_symbols.begin(), m_symbols.end()); m_symbols.swap(new_symbols); } + SaveToCache(); } Symbol *Symtab::FindSymbolAtFileAddress(addr_t file_addr) { @@ -1150,3 +1157,191 @@ } return nullptr; } + +std::string Symtab::GetCacheKey() { + std::string key; + llvm::raw_string_ostream strm(key); + // Symbol table can come from different object files for the same module. A + // module can have one object file as the main executable and might have + // another object file in a separate symbol file. + strm << m_objfile->GetModule()->GetCacheKey() << "-symtab-" + << llvm::format_hex(m_objfile->GetCacheHash(), 10); + return strm.str(); +} + +void Symtab::SaveToCache() { + DataFileCache *cache = Module::GetIndexCache(); + if (!cache) + return; // Caching is not enabled. + InitNameIndexes(); // Init the name indexes so we can cache them as well. + const auto byte_order = endian::InlHostByteOrder(); + DataEncoder file(byte_order, /*addr_size=*/8); + // Encode will return false if the symbol table's object file doesn't have + // anything to make a signature from. + if (Encode(file)) + cache->SetCachedData(GetCacheKey(), file.GetData()); +} + +constexpr llvm::StringLiteral kIdentifierCStrMap("CMAP"); + +static void EncodeCStrMap(DataEncoder &encoder, ConstStringTable &strtab, + const UniqueCStringMap &cstr_map) { + encoder.AppendData(kIdentifierCStrMap); + encoder.AppendU32(cstr_map.GetSize()); + for (const auto &entry: cstr_map) { + // Make sure there are no empty strings. + assert((bool)entry.cstring); + encoder.AppendU32(strtab.Add(entry.cstring)); + encoder.AppendU32(entry.value); + } +} + +bool DecodeCStrMap(const DataExtractor &data, lldb::offset_t *offset_ptr, + const StringTableReader &strtab, + UniqueCStringMap &cstr_map) { + llvm::StringRef identifier((const char *)data.GetData(offset_ptr, 4), 4); + if (identifier != kIdentifierCStrMap) + return false; + const uint32_t count = data.GetU32(offset_ptr); + for (uint32_t i=0; i cstr_maps[num_cstr_maps] +bool Symtab::Encode(DataEncoder &encoder) const { + // Name indexes must be computed before calling this function. + assert(m_name_indexes_computed); + + // Encode the object file's signature + CacheSignature signature(m_objfile); + if (!signature.Encode(encoder)) + return false; + ConstStringTable strtab; + + // Encoder the symbol table into a separate encoder first. This allows us + // gather all of the strings we willl need in "strtab" as we will need to + // write the string table out before the symbol table. + DataEncoder symtab_encoder(encoder.GetByteOrder(), + encoder.GetAddressByteSize()); + symtab_encoder.AppendData(kIdentifierSymbolTable); + // Encode the symtab data version. + symtab_encoder.AppendU32(CURRENT_CACHE_VERSION); + // Encode the number of symbols. + symtab_encoder.AppendU32(m_symbols.size()); + // Encode the symbol data for all symbols. + for (const auto &symbol: m_symbols) + symbol.Encode(symtab_encoder, strtab); + + // Emit a byte for how many C string maps we emit. We will fix this up after + // we emit the C string maps since we skip emitting C string maps if they are + // empty. + size_t num_cmaps_offset = symtab_encoder.GetByteSize(); + uint8_t num_cmaps = 0; + symtab_encoder.AppendU8(0); + for (const auto &pair: m_name_to_symbol_indices) { + if (pair.second.IsEmpty()) + continue; + ++num_cmaps; + symtab_encoder.AppendU8(pair.first); + EncodeCStrMap(symtab_encoder, strtab, pair.second); + } + if (num_cmaps > 0) + symtab_encoder.PutU8(num_cmaps_offset, num_cmaps); + + // Now that all strings have been gathered, we will emit the string table. + strtab.Encode(encoder); + // Followed the the symbol table data. + encoder.AppendData(symtab_encoder.GetData()); + return true; +} + +bool Symtab::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr, + bool &signature_mismatch) { + signature_mismatch = false; + CacheSignature signature; + StringTableReader strtab; + { // Scope for "elapsed" object below so it can measure the time parse. + ElapsedTime elapsed(m_objfile->GetModule()->GetSymtabParseTime()); + if (!signature.Decode(data, offset_ptr)) + return false; + if (CacheSignature(m_objfile) != signature) { + signature_mismatch = true; + return false; + } + // We now decode the string table for all strings in the data cache file. + if (!strtab.Decode(data, offset_ptr)) + return false; + + // And now we can decode the symbol table with string table we just decoded. + llvm::StringRef identifier((const char *)data.GetData(offset_ptr, 4), 4); + if (identifier != kIdentifierSymbolTable) + return false; + const uint32_t version = data.GetU32(offset_ptr); + if (version != CURRENT_CACHE_VERSION) + return false; + const uint32_t num_symbols = data.GetU32(offset_ptr); + if (num_symbols == 0) + return true; + m_symbols.resize(num_symbols); + SectionList *sections = m_objfile->GetModule()->GetSectionList(); + for (uint32_t i=0; iGetModule()->GetSymtabIndexTime()); + const uint8_t num_cstr_maps = data.GetU8(offset_ptr); + for (uint8_t i=0; i &cstr_map = + GetNameToSymbolIndexMap((lldb::FunctionNameType)type); + if (!DecodeCStrMap(data, offset_ptr, strtab, cstr_map)) + return false; + } + m_name_indexes_computed = true; + } + return true; +} + +bool Symtab::LoadFromCache() { + DataFileCache *cache = Module::GetIndexCache(); + if (!cache) + return false; + + std::unique_ptr mem_buffer_up = + cache->GetCachedData(GetCacheKey()); + if (!mem_buffer_up) + return false; + DataExtractor data(mem_buffer_up->getBufferStart(), + mem_buffer_up->getBufferSize(), + m_objfile->GetByteOrder(), + m_objfile->GetAddressByteSize()); + bool signature_mismatch = false; + lldb::offset_t offset = 0; + const bool result = Decode(data, &offset, signature_mismatch); + if (signature_mismatch) + cache->RemoveCacheFile(GetCacheKey()); + return result; +} diff --git a/lldb/source/Utility/DataEncoder.cpp b/lldb/source/Utility/DataEncoder.cpp --- a/lldb/source/Utility/DataEncoder.cpp +++ b/lldb/source/Utility/DataEncoder.cpp @@ -172,6 +172,13 @@ m_data_sp->AppendData(bytes, length); } +void DataEncoder::AppendData(llvm::ArrayRef data) { + const uint8_t *bytes = data.data(); + const size_t length = data.size(); + if (bytes && length > 0) + m_data_sp->AppendData(bytes, length); +} + void DataEncoder::AppendCString(llvm::StringRef data) { const char *bytes = data.data(); const size_t length = data.size(); diff --git a/lldb/test/API/functionalities/module_cache/bsd/Makefile b/lldb/test/API/functionalities/module_cache/bsd/Makefile new file mode 100644 --- /dev/null +++ b/lldb/test/API/functionalities/module_cache/bsd/Makefile @@ -0,0 +1,27 @@ +C_SOURCES := main.c a.c b.c c.c +EXE := # Define a.out explicitly +MAKE_DSYM := NO + +all: a.out + +a.out: main.o libfoo.a + $(LD) $(LDFLAGS) $^ -o $@ + +lib_ab.a: a.o b.o + $(AR) $(ARFLAGS) $@ $^ + $(RM) $^ + +# Here we make a .a file that has two a.o files with different modification +# times and different content by first creating libfoo.a with only a.o and b.o, +# then we sleep for 2 seconds, touch c.o to ensure it has a different +# modification time, and then rename c.o to a.o and then add it to the .a file +# again. This is to help test that the module cache will create different +# directories for the two different a.o files. +libfoo.a: lib_ab.a c.o + sleep 2 + touch c.o + mv c.o a.o + $(AR) $(ARFLAGS) $@ lib_ab.a a.o + $(RM) a.o + +include Makefile.rules diff --git a/lldb/test/API/functionalities/module_cache/bsd/TestModuleCacheBSD.py b/lldb/test/API/functionalities/module_cache/bsd/TestModuleCacheBSD.py new file mode 100644 --- /dev/null +++ b/lldb/test/API/functionalities/module_cache/bsd/TestModuleCacheBSD.py @@ -0,0 +1,85 @@ +"""Test the LLDB module cache funcionality.""" + +import glob +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil +import os +import time + + +class ModuleCacheTestcaseBSD(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + def setUp(self): + # Call super's setUp(). + TestBase.setUp(self) + # Find the line number in a(int) to break at. + self.line_a = line_number( + 'a.c', '// Set file and line breakpoint inside a().') + self.line_b = line_number( + 'b.c', '// Set file and line breakpoint inside b().') + self.line_c = line_number( + 'c.c', '// Set file and line breakpoint inside c().') + self.cache_dir = os.path.join(self.getBuildDir(), 'lldb-module-cache') + # Set the lldb module cache directory to a directory inside the build + # artifacts directory so no other tests are interfered with. + self.runCmd('settings set symbols.lldb-index-cache-path "%s"' % (self.cache_dir)) + self.runCmd('settings set symbols.enable-lldb-index-cache true') + self.build() + + + def get_module_cache_files(self, basename): + module_cache_glob = os.path.join(self.cache_dir, "llvmcache-*%s*symtab*" % (basename)) + return glob.glob(module_cache_glob) + + + # Requires no dSYM, so we let the Makefile make the right stuff for us + @no_debug_info_test + @skipUnlessDarwin + def test(self): + """ + Test module cache functionality for bsd archive object files. + + This will test that if we enable the module cache, we have a + corresponding cache entry for the .o files in libfoo.a. + + The static library has two entries for "a.o": + - one from a.c + - one from c.c which had c.o renamed to a.o and then put into the + libfoo.a as an extra .o file with different contents from the + original a.o + + We do this to test that we can correctly cache duplicate .o files + that appear in .a files. + + This test only works on darwin because of the way DWARF is stored + where the debug map will refer to .o files inside of .a files. + """ + exe = self.getBuildArtifact("a.out") + + # Create a module with no depedencies. + target = self.createTestTarget(load_dependent_modules=False) + + self.runCmd('breakpoint set -f a.c -l %d' % (self.line_a)) + self.runCmd('breakpoint set -f b.c -l %d' % (self.line_b)) + self.runCmd('breakpoint set -f c.c -l %d' % (self.line_c)) + + # Get the executable module and get the number of symbols to make + # sure the symbol table gets parsed and cached. The module cache is + # enabled in the setUp() function. + main_module = target.GetModuleAtIndex(0) + self.assertTrue(main_module.IsValid()) + # Make sure the symbol table gets loaded and cached + main_module.GetNumSymbols() + a_o_cache_files = self.get_module_cache_files("libfoo.a(a.o)") + b_o_cache_files = self.get_module_cache_files("libfoo.a(b.o)") + # We expect the directory for a.o to have two cache directories: + # - 1 for the a.o with a earlier mod time + # - 1 for the a.o that was renamed from c.o that should be 2 seconds older + self.assertEqual(len(a_o_cache_files), 2, + "make sure there are two files in the module cache directory (%s) for libfoo.a(a.o)" % (self.cache_dir)) + self.assertEqual(len(b_o_cache_files), 1, + "make sure there are two files in the module cache directory (%s) for libfoo.a(b.o)" % (self.cache_dir)) diff --git a/lldb/test/API/functionalities/module_cache/bsd/a.c b/lldb/test/API/functionalities/module_cache/bsd/a.c new file mode 100644 --- /dev/null +++ b/lldb/test/API/functionalities/module_cache/bsd/a.c @@ -0,0 +1,6 @@ +int __a_global = 1; + +int a(int arg) { + int result = arg + __a_global; + return result; // Set file and line breakpoint inside a(). +} diff --git a/lldb/test/API/functionalities/module_cache/bsd/b.c b/lldb/test/API/functionalities/module_cache/bsd/b.c new file mode 100644 --- /dev/null +++ b/lldb/test/API/functionalities/module_cache/bsd/b.c @@ -0,0 +1,6 @@ +static int __b_global = 2; + +int b(int arg) { + int result = arg + __b_global; + return result; // Set file and line breakpoint inside b(). +} diff --git a/lldb/test/API/functionalities/module_cache/bsd/c.c b/lldb/test/API/functionalities/module_cache/bsd/c.c new file mode 100644 --- /dev/null +++ b/lldb/test/API/functionalities/module_cache/bsd/c.c @@ -0,0 +1,6 @@ +static int __c_global = 3; + +int c(int arg) { + int result = arg + __c_global; + return result; // Set file and line breakpoint inside c(). +} diff --git a/lldb/test/API/functionalities/module_cache/bsd/main.c b/lldb/test/API/functionalities/module_cache/bsd/main.c new file mode 100644 --- /dev/null +++ b/lldb/test/API/functionalities/module_cache/bsd/main.c @@ -0,0 +1,11 @@ +#include + +extern int a(int); +extern int b(int); +extern int c(int); +int main (int argc, char const *argv[]) +{ + printf ("a(1) returns %d\n", a(1)); + printf ("b(2) returns %d\n", b(2)); + printf ("c(2) returns %d\n", c(2)); +} diff --git a/lldb/test/API/functionalities/module_cache/simple_exe/Makefile b/lldb/test/API/functionalities/module_cache/simple_exe/Makefile new file mode 100644 --- /dev/null +++ b/lldb/test/API/functionalities/module_cache/simple_exe/Makefile @@ -0,0 +1,3 @@ +C_SOURCES := main.c + +include Makefile.rules diff --git a/lldb/test/API/functionalities/module_cache/simple_exe/TestModuleCacheSimple.py b/lldb/test/API/functionalities/module_cache/simple_exe/TestModuleCacheSimple.py new file mode 100644 --- /dev/null +++ b/lldb/test/API/functionalities/module_cache/simple_exe/TestModuleCacheSimple.py @@ -0,0 +1,99 @@ +"""Test the LLDB module cache funcionality.""" + +import glob +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil +import os +import time + + +class ModuleCacheTestcaseSimple(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + def setUp(self): + # Call super's setUp(). + TestBase.setUp(self) + # Find the line number in a(int) to break at. + self.cache_dir = os.path.join(self.getBuildDir(), 'lldb-module-cache') + # Set the lldb module cache directory to a directory inside the build + # artifacts directory so no other tests are interfered with. + self.runCmd('settings set symbols.lldb-index-cache-path "%s"' % (self.cache_dir)) + self.runCmd('settings set symbols.enable-lldb-index-cache true') + self.build() + + + def get_module_cache_files(self, basename): + module_file_glob = os.path.join(self.cache_dir, "llvmcache-*%s*" % (basename)) + return glob.glob(module_file_glob) + + # Doesn't depend on any specific debug information. + @no_debug_info_test + def test(self): + """ + Test module cache functionality for a simple object file. + + This will test that if we enable the module cache, we have a + corresponding index cache entry for the symbol table for the + executable. It also removes the executable, rebuilds so that the + modification time of the binary gets updated, and then creates a new + target and should cause the cache to get updated so the cache file + should get an updated modification time. + """ + exe = self.getBuildArtifact("a.out") + + # Create a module with no depedencies. + target = self.createTestTarget(load_dependent_modules=False) + + # Get the executable module and get the number of symbols to make + # sure the symbol table gets parsed and cached. The module cache is + # enabled in the setUp() function. + main_module = target.GetModuleAtIndex(0) + self.assertTrue(main_module.IsValid()) + # Make sure the symbol table gets loaded and cached + main_module.GetNumSymbols() + cache_files = self.get_module_cache_files("a.out") + self.assertEqual(len(cache_files), 1, + "make sure there is only one cache file for 'a.out'") + symtab_cache_path = cache_files[0] + exe_mtime_1 = os.path.getmtime(exe) + symtab_mtime_1 = os.path.getmtime(symtab_cache_path) + # Now remove the executable and sleep for a few seconds to make sure we + # get a different creation and modification time for the file since some + # OSs store the modification time in seconds since Jan 1, 1970. + os.remove(exe) + self.assertEqual(os.path.exists(exe), False, + 'make sure we were able to remove the executable') + time.sleep(2) + # Now rebuild the binary so it has a different content which should + # update the UUID to make the cache miss when it tries to load the + # symbol table from the binary at the same path. + self.build(dictionary={'CFLAGS_EXTRAS': '-DEXTRA_FUNCTION'}) + self.assertEqual(os.path.exists(exe), True, + 'make sure executable exists after rebuild') + # Make sure the modification time has changed or this test will fail. + exe_mtime_2 = os.path.getmtime(exe) + self.assertNotEqual( + exe_mtime_1, + exe_mtime_2, + "make sure the modification time of the executable has changed") + # Make sure the module cache still has an out of date cache with the + # same old modification time. + self.assertEqual(symtab_mtime_1, + os.path.getmtime(symtab_cache_path), + "check that the 'symtab' cache file modification time doesn't match the executable modification time after rebuild") + # Create a new target and get the symbols again, and make sure the cache + # gets updated for the symbol table cache + target = self.createTestTarget(load_dependent_modules=False) + main_module = target.GetModuleAtIndex(0) + self.assertTrue(main_module.IsValid()) + main_module.GetNumSymbols() + self.assertEqual(os.path.exists(symtab_cache_path), True, + 'make sure "symtab" cache files exists after cache is updated') + symtab_mtime_2 = os.path.getmtime(symtab_cache_path) + self.assertNotEqual( + symtab_mtime_1, + symtab_mtime_2, + 'make sure modification time of "symtab-..." changed') diff --git a/lldb/test/API/functionalities/module_cache/simple_exe/main.c b/lldb/test/API/functionalities/module_cache/simple_exe/main.c new file mode 100644 --- /dev/null +++ b/lldb/test/API/functionalities/module_cache/simple_exe/main.c @@ -0,0 +1,13 @@ +#ifdef EXTRA_FUNCTION +int foo(int i) { + return i*3; +} +#endif + +int main (int argc, char const *argv[]) { +#ifdef EXTRA_FUNCTION + return foo(argc); +#else + return 0; +#endif +} diff --git a/lldb/test/API/functionalities/module_cache/universal/Makefile b/lldb/test/API/functionalities/module_cache/universal/Makefile new file mode 100644 --- /dev/null +++ b/lldb/test/API/functionalities/module_cache/universal/Makefile @@ -0,0 +1,20 @@ +EXE := testit + +include Makefile.rules + +all: testit + +testit: testit.x86_64 testit.arm64 + lipo -create -o testit $^ + +testit.arm64: testit.arm64.o + $(CC) -isysroot $(SDKROOT) -target arm64-apple-macosx10.9 -o testit.arm64 $< + +testit.x86_64: testit.x86_64.o + $(CC) -isysroot $(SDKROOT) -target x86_64-apple-macosx10.9 -o testit.x86_64 $< + +testit.arm64.o: main.c + $(CC) -isysroot $(SDKROOT) -g -O0 -target arm64-apple-macosx10.9 -c -o testit.arm64.o $< + +testit.x86_64.o: main.c + $(CC) -isysroot $(SDKROOT) -g -O0 -target x86_64-apple-macosx10.9 -c -o testit.x86_64.o $< diff --git a/lldb/test/API/functionalities/module_cache/universal/TestModuleCacheUniversal.py b/lldb/test/API/functionalities/module_cache/universal/TestModuleCacheUniversal.py new file mode 100644 --- /dev/null +++ b/lldb/test/API/functionalities/module_cache/universal/TestModuleCacheUniversal.py @@ -0,0 +1,57 @@ +"""Test the LLDB module cache funcionality for universal mach-o files.""" + +import glob +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil +import os +import time + + +class ModuleCacheTestcaseUniversal(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + def setUp(self): + # Call super's setUp(). + TestBase.setUp(self) + # Find the line number in a(int) to break at. + self.cache_dir = os.path.join(self.getBuildDir(), 'lldb-module-cache') + # Set the lldb module cache directory to a directory inside the build + # artifacts directory so no other tests are interfered with. + self.runCmd('settings set symbols.lldb-index-cache-path "%s"' % (self.cache_dir)) + self.runCmd('settings set symbols.enable-lldb-index-cache true') + self.build() + + + def get_module_cache_files(self, basename): + module_file_glob = os.path.join(self.cache_dir, "llvmcache-*%s*" % (basename)) + return glob.glob(module_file_glob) + + + # Doesn't depend on any specific debug information. + @no_debug_info_test + @skipUnlessDarwin + @skipIfDarwinEmbedded # this test file assumes we're targetting an x86 system + def test(self): + """ + Test module cache functionality for a universal mach-o files. + + This will test that if we enable the module cache, we can create + lldb module caches for each slice of a universal mach-o file and + they will each have a unique directory. + """ + exe_basename = "testit" + exe = self.getBuildArtifact(exe_basename) + + # Create a module with no depedencies. + self.runCmd('target create -d --arch x86_64 %s' % (exe)) + self.runCmd('image dump symtab %s' % (exe_basename)) + self.runCmd('target create -d --arch arm64 %s' % (exe)) + self.runCmd('image dump symtab %s' % (exe_basename)) + + cache_files = self.get_module_cache_files(exe_basename) + + self.assertEqual(len(cache_files), 2, + "make sure there are two files in the module cache directory (%s) for %s" % (self.cache_dir, exe_basename)) diff --git a/lldb/test/API/functionalities/module_cache/universal/main.c b/lldb/test/API/functionalities/module_cache/universal/main.c new file mode 100644 --- /dev/null +++ b/lldb/test/API/functionalities/module_cache/universal/main.c @@ -0,0 +1,3 @@ +int main(int argc, const char **argv) { + return 0; +} diff --git a/lldb/unittests/Symbol/CMakeLists.txt b/lldb/unittests/Symbol/CMakeLists.txt --- a/lldb/unittests/Symbol/CMakeLists.txt +++ b/lldb/unittests/Symbol/CMakeLists.txt @@ -1,6 +1,9 @@ add_lldb_unittest(SymbolTests LocateSymbolFileTest.cpp + MangledTest.cpp PostfixExpressionTest.cpp + SymbolTest.cpp + SymtabTest.cpp TestTypeSystem.cpp TestTypeSystemClang.cpp TestClangASTImporter.cpp @@ -9,6 +12,7 @@ TestLineEntry.cpp LINK_LIBS + lldbCore lldbHost lldbSymbol lldbUtilityHelpers diff --git a/lldb/unittests/Symbol/MangledTest.cpp b/lldb/unittests/Symbol/MangledTest.cpp new file mode 100644 --- /dev/null +++ b/lldb/unittests/Symbol/MangledTest.cpp @@ -0,0 +1,71 @@ +//===-- SymbolTest.cpp ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/Core/Mangled.h" +#include "lldb/Core/DataFileCache.h" +#include "lldb/Utility/DataEncoder.h" +#include "lldb/Utility/DataExtractor.h" + +#include "gtest/gtest.h" + +using namespace lldb; +using namespace lldb_private; + +static void EncodeDecode(const Mangled &object, ByteOrder byte_order) { + const uint8_t addr_size = 8; + DataEncoder file(byte_order, addr_size); + DataEncoder strtab_encoder(byte_order, addr_size); + ConstStringTable const_strtab; + + object.Encode(file, const_strtab); + + llvm::ArrayRef bytes = file.GetData(); + DataExtractor data(bytes.data(), bytes.size(), byte_order, addr_size); + + const_strtab.Encode(strtab_encoder); + llvm::ArrayRef strtab_bytes = strtab_encoder.GetData(); + DataExtractor strtab_data(strtab_bytes.data(), strtab_bytes.size(), + byte_order, addr_size); + StringTableReader strtab_reader; + offset_t strtab_data_offset = 0; + ASSERT_EQ(strtab_reader.Decode(strtab_data, &strtab_data_offset), true); + + Mangled decoded_object; + offset_t data_offset = 0; + decoded_object.Decode(data, &data_offset, strtab_reader); + EXPECT_EQ(object, decoded_object); +} + +static void EncodeDecode(const Mangled &object) { + EncodeDecode(object, eByteOrderLittle); + EncodeDecode(object, eByteOrderBig); +} + +TEST(MangledTest, EncodeDecodeMangled) { + Mangled mangled; + // Test encoding and decoding an empty mangled object. + EncodeDecode(mangled); + + // Test encoding a mangled object that hasn't demangled its name yet. + mangled.SetMangledName(ConstString("_Z3fooi")); + EncodeDecode(mangled); + + // Test encoding a mangled object that has demangled its name by computing it. + mangled.GetDemangledName(); + // EncodeDecode(mangled); + + // Test encoding a mangled object that has just a demangled name + mangled.SetMangledName(ConstString()); + mangled.SetDemangledName(ConstString("hello")); + EncodeDecode(mangled); + + // Test encoding a mangled name that has both a mangled and demangled name + // that are not mangled/demangled counterparts of each other. + mangled.SetMangledName(ConstString("world")); + EncodeDecode(mangled); +} diff --git a/lldb/unittests/Symbol/SymbolTest.cpp b/lldb/unittests/Symbol/SymbolTest.cpp new file mode 100644 --- /dev/null +++ b/lldb/unittests/Symbol/SymbolTest.cpp @@ -0,0 +1,136 @@ +//===-- SymbolTest.cpp ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "lldb/Symbol/Symbol.h" +#include "lldb/Core/DataFileCache.h" +#include "lldb/Core/Section.h" +#include "lldb/Utility/DataEncoder.h" +#include "lldb/Utility/DataExtractor.h" + +#include "gtest/gtest.h" + +using namespace lldb; +using namespace lldb_private; + +static void EncodeDecode(const Symbol &object, const SectionList *sect_list, + ByteOrder byte_order) { + const uint8_t addr_size = 8; + DataEncoder file(byte_order, addr_size); + DataEncoder strtab_encoder(byte_order, addr_size); + ConstStringTable const_strtab; + object.Encode(file, const_strtab); + llvm::ArrayRef bytes = file.GetData(); + DataExtractor data(bytes.data(), bytes.size(), byte_order, addr_size); + + const_strtab.Encode(strtab_encoder); + llvm::ArrayRef strtab_bytes = strtab_encoder.GetData(); + DataExtractor strtab_data(strtab_bytes.data(), strtab_bytes.size(), + byte_order, addr_size); + StringTableReader strtab_reader; + offset_t strtab_data_offset = 0; + ASSERT_EQ(strtab_reader.Decode(strtab_data, &strtab_data_offset), true); + + Symbol decoded_object; + offset_t data_offset = 0; + decoded_object.Decode(data, &data_offset, sect_list, strtab_reader); + EXPECT_EQ(object, decoded_object); +} + +static void EncodeDecode(const Symbol &object, const SectionList *sect_list) { + EncodeDecode(object, sect_list, eByteOrderLittle); + EncodeDecode(object, sect_list, eByteOrderBig); +} + +TEST(SymbolTest, EncodeDecodeSymbol) { + + SectionSP sect_sp(new Section( + /*module_sp=*/ModuleSP(), + /*obj_file=*/nullptr, + /*sect_id=*/1, + /*name=*/ConstString(".text"), + /*sect_type=*/eSectionTypeCode, + /*file_vm_addr=*/0x1000, + /*vm_size=*/0x1000, + /*file_offset=*/0, + /*file_size=*/0, + /*log2align=*/5, + /*flags=*/0x10203040)); + + SectionList sect_list; + sect_list.AddSection(sect_sp); + + Symbol symbol( + /*symID=*/0x10203040, + /*name=*/"main", + /*type=*/eSymbolTypeCode, + /*bool external=*/false, + /*bool is_debug=*/false, + /*bool is_trampoline=*/false, + /*bool is_artificial=*/false, + /*section_sp=*/sect_sp, + /*offset=*/0x0, + /*size=*/0x100, + /*size_is_valid=*/true, + /*contains_linker_annotations=*/false, + /*flags=*/0x11223344); + + // Test encoding a symbol with an address. + EncodeDecode(symbol, §_list); + + // Test that encoding the bits in the bitfield works for all endianness + // combos. + + // Test Symbol.m_is_synthetic + symbol.SetIsSynthetic(true); + EncodeDecode(symbol, §_list); + symbol.SetIsSynthetic(false); + + // Test Symbol.m_is_debug + symbol.SetDebug(true); + EncodeDecode(symbol, §_list); + symbol.SetDebug(false); + + // Test Symbol.m_is_external + symbol.SetExternal(true); + EncodeDecode(symbol, §_list); + symbol.SetExternal(false); + + // Test Symbol.m_size_is_sibling + symbol.SetSizeIsSibling(true); + EncodeDecode(symbol, §_list); + symbol.SetSizeIsSibling(false); + + // Test Symbol.m_size_is_synthesized + symbol.SetSizeIsSynthesized(true); + EncodeDecode(symbol, §_list); + symbol.SetSizeIsSynthesized(false); + + // Test Symbol.m_size_is_synthesized + symbol.SetByteSize(0); + EncodeDecode(symbol, §_list); + symbol.SetByteSize(0x100); + + // Test Symbol.m_demangled_is_synthesized + symbol.SetDemangledNameIsSynthesized(true); + EncodeDecode(symbol, §_list); + symbol.SetDemangledNameIsSynthesized(false); + + // Test Symbol.m_contains_linker_annotations + symbol.SetContainsLinkerAnnotations(true); + EncodeDecode(symbol, §_list); + symbol.SetContainsLinkerAnnotations(false); + + // Test Symbol.m_is_weak + symbol.SetIsWeak(true); + EncodeDecode(symbol, §_list); + symbol.SetIsWeak(false); + + // Test encoding a symbol with no address. + symbol.GetAddressRef().SetSection(SectionSP()); + EncodeDecode(symbol, §_list); +} diff --git a/lldb/unittests/Symbol/SymtabTest.cpp b/lldb/unittests/Symbol/SymtabTest.cpp new file mode 100644 --- /dev/null +++ b/lldb/unittests/Symbol/SymtabTest.cpp @@ -0,0 +1,305 @@ +//===-- SymbolTest.cpp ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Plugins/ObjectFile/Mach-O/ObjectFileMachO.h" +#include "Plugins/SymbolFile/DWARF/SymbolFileDWARF.h" +#include "Plugins/TypeSystem/Clang/TypeSystemClang.h" +#include "TestingSupport/SubsystemRAII.h" +#include "TestingSupport/TestUtilities.h" + +#include "lldb/Core/DataFileCache.h" +#include "lldb/Core/Module.h" +#include "lldb/Host/FileSystem.h" +#include "lldb/Host/HostInfo.h" +#include "lldb/Symbol/Symbol.h" +#include "lldb/Symbol/Symtab.h" +#include "lldb/Utility/DataEncoder.h" +#include "lldb/Utility/DataExtractor.h" + +#include + +#include "gtest/gtest.h" + +using namespace lldb; +using namespace lldb_private; + +class SymtabTest : public testing::Test { + SubsystemRAII + subsystem; +}; + +static void EncodeDecode(const Symtab &object, ByteOrder byte_order) { + const uint8_t addr_size = 8; + DataEncoder file(byte_order, addr_size); + + object.Encode(file); + llvm::ArrayRef bytes = file.GetData(); + DataExtractor data(bytes.data(), bytes.size(), byte_order, addr_size); + Symtab decoded_object(object.GetObjectFile()); + offset_t data_offset = 0; + bool uuid_mismatch = false; + decoded_object.Decode(data, &data_offset, uuid_mismatch); + ASSERT_EQ(object.GetNumSymbols(), decoded_object.GetNumSymbols()); + for (size_t i = 0; i < object.GetNumSymbols(); ++i) + EXPECT_EQ(*object.SymbolAtIndex(i), *decoded_object.SymbolAtIndex(i)); +} + +static void EncodeDecode(const Symtab &object) { + EncodeDecode(object, eByteOrderLittle); + EncodeDecode(object, eByteOrderBig); +} + +TEST_F(SymtabTest, EncodeDecodeSymtab) { + + auto ExpectedFile = TestFile::fromYaml(R"( +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x100000C + cpusubtype: 0x0 + filetype: 0x2 + ncmds: 17 + sizeofcmds: 792 + flags: 0x200085 + reserved: 0x0 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __PAGEZERO + vmaddr: 0 + vmsize: 4294967296 + fileoff: 0 + filesize: 0 + maxprot: 0 + initprot: 0 + nsects: 0 + flags: 0 + - cmd: LC_SEGMENT_64 + cmdsize: 232 + segname: __TEXT + vmaddr: 4294967296 + vmsize: 16384 + fileoff: 0 + filesize: 16384 + maxprot: 5 + initprot: 5 + nsects: 2 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x100003F94 + size: 36 + offset: 0x3F94 + align: 2 + reloff: 0x0 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: FF8300D1E80300AA00008052FF1F00B9E81B00B9E10B00F9E20700F9FF830091C0035FD6 + - sectname: __unwind_info + segname: __TEXT + addr: 0x100003FB8 + size: 72 + offset: 0x3FB8 + align: 2 + reloff: 0x0 + nreloc: 0 + flags: 0x0 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: 010000001C000000000000001C000000000000001C00000002000000943F00003400000034000000B93F00000000000034000000030000000C000100100001000000000000200002 + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __LINKEDIT + vmaddr: 4294983680 + vmsize: 16384 + fileoff: 16384 + filesize: 674 + maxprot: 1 + initprot: 1 + nsects: 0 + flags: 0 + - cmd: LC_DYLD_CHAINED_FIXUPS + cmdsize: 16 + dataoff: 16384 + datasize: 56 + - cmd: LC_DYLD_EXPORTS_TRIE + cmdsize: 16 + dataoff: 16440 + datasize: 48 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 16496 + nsyms: 10 + stroff: 16656 + strsize: 128 + - cmd: LC_DYSYMTAB + cmdsize: 80 + ilocalsym: 0 + nlocalsym: 8 + iextdefsym: 8 + nextdefsym: 2 + iundefsym: 10 + nundefsym: 0 + tocoff: 0 + ntoc: 0 + modtaboff: 0 + nmodtab: 0 + extrefsymoff: 0 + nextrefsyms: 0 + indirectsymoff: 0 + nindirectsyms: 0 + extreloff: 0 + nextrel: 0 + locreloff: 0 + nlocrel: 0 + - cmd: LC_LOAD_DYLINKER + cmdsize: 32 + name: 12 + Content: '/usr/lib/dyld' + ZeroPadBytes: 7 + - cmd: LC_UUID + cmdsize: 24 + uuid: 1EECD2B8-16EA-3FEC-AB3C-F46139DBD0E2 + - cmd: LC_BUILD_VERSION + cmdsize: 32 + platform: 1 + minos: 786432 + sdk: 786432 + ntools: 1 + Tools: + - tool: 3 + version: 46596096 + - cmd: LC_SOURCE_VERSION + cmdsize: 16 + version: 0 + - cmd: LC_MAIN + cmdsize: 24 + entryoff: 16276 + stacksize: 0 + - cmd: LC_LOAD_DYLIB + cmdsize: 48 + dylib: + name: 24 + timestamp: 2 + current_version: 78643968 + compatibility_version: 65536 + Content: '/usr/lib/libc++.1.dylib' + ZeroPadBytes: 1 + - cmd: LC_LOAD_DYLIB + cmdsize: 56 + dylib: + name: 24 + timestamp: 2 + current_version: 85917696 + compatibility_version: 65536 + Content: '/usr/lib/libSystem.B.dylib' + ZeroPadBytes: 6 + - cmd: LC_FUNCTION_STARTS + cmdsize: 16 + dataoff: 16488 + datasize: 8 + - cmd: LC_DATA_IN_CODE + cmdsize: 16 + dataoff: 16496 + datasize: 0 + - cmd: LC_CODE_SIGNATURE + cmdsize: 16 + dataoff: 16784 + datasize: 274 +LinkEditData: + NameList: + - n_strx: 28 + n_type: 0x64 + n_sect: 0 + n_desc: 0 + n_value: 0 + - n_strx: 64 + n_type: 0x64 + n_sect: 0 + n_desc: 0 + n_value: 0 + - n_strx: 73 + n_type: 0x66 + n_sect: 0 + n_desc: 1 + n_value: 1639532873 + - n_strx: 1 + n_type: 0x2E + n_sect: 1 + n_desc: 0 + n_value: 4294983572 + - n_strx: 115 + n_type: 0x24 + n_sect: 1 + n_desc: 0 + n_value: 4294983572 + - n_strx: 1 + n_type: 0x24 + n_sect: 0 + n_desc: 0 + n_value: 36 + - n_strx: 1 + n_type: 0x4E + n_sect: 1 + n_desc: 0 + n_value: 36 + - n_strx: 1 + n_type: 0x64 + n_sect: 1 + n_desc: 0 + n_value: 0 + - n_strx: 2 + n_type: 0xF + n_sect: 1 + n_desc: 16 + n_value: 4294967296 + - n_strx: 22 + n_type: 0xF + n_sect: 1 + n_desc: 0 + n_value: 4294983572 + StringTable: + - ' ' + - __mh_execute_header + - _main + - '/Users/gclayton/Documents/src/args/' + - main.cpp + - '/Users/gclayton/Documents/src/args/main.o' + - _main + - '' + - '' + - '' + - '' + - '' + - '' + - '' +... +)"); + + ASSERT_THAT_EXPECTED(ExpectedFile, llvm::Succeeded()); + auto module_sp = std::make_shared(ExpectedFile->moduleSpec()); + ObjectFile *objfile = module_sp->GetObjectFile(); + ASSERT_NE(objfile, nullptr); + + // Test encoding and decoding an empty symbol table. + Symtab symtab(objfile); + symtab.PreloadSymbols(); + EncodeDecode(symtab); + + // Now encode and decode an actual symbol table from our yaml. + Symtab *module_symtab = module_sp->GetSymtab(); + ASSERT_NE(module_symtab, nullptr); + module_symtab->PreloadSymbols(); + EncodeDecode(*module_symtab); +}