diff --git a/llvm/include/llvm/DebugInfo/GSYM/FileWriter.h b/llvm/include/llvm/DebugInfo/GSYM/FileWriter.h --- a/llvm/include/llvm/DebugInfo/GSYM/FileWriter.h +++ b/llvm/include/llvm/DebugInfo/GSYM/FileWriter.h @@ -113,6 +113,8 @@ return OS; } + llvm::support::endianness getByteOrder() const { return ByteOrder; } + private: FileWriter(const FileWriter &rhs) = delete; void operator=(const FileWriter &rhs) = delete; diff --git a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h --- a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h +++ b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h @@ -90,6 +90,10 @@ uint32_t Name; ///< String table offset in the string table. std::optional OptLineTable; std::optional Inline; + /// If we encode a FunctionInfo during segmenting so we know its size, we can + /// cache that encoding here so we don't need to re-encode it when saving the + /// GSYM file. + SmallString<32> EncodingCache; FunctionInfo(uint64_t Addr = 0, uint64_t Size = 0, uint32_t N = 0) : Range(Addr, Addr + Size), Name(N) {} @@ -140,6 +144,17 @@ /// function info that was successfully written into the stream. llvm::Expected encode(FileWriter &O) const; + /// Encode this function info into the internal byte cache and return the size + /// in bytes. + /// + /// When segmenting GSYM files we need to know how big each FunctionInfo will + /// encode into so we can generate segments of the right size. We don't want + /// to have to encode a FunctionInfo twice, so we can cache the encoded bytes + /// and re-use then when calling FunctionInfo::encode(...). + /// + /// \returns The size in bytes of the FunctionInfo if it were to be encoded + /// into a byte stream. + uint64_t cacheEncoding(); /// Lookup an address within a FunctionInfo object's data stream. /// diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h --- a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h +++ b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h @@ -137,6 +137,8 @@ StringTableBuilder StrTab; StringSet<> StringStorage; DenseMap FileEntryToIndex; + // Needed for mapping string offsets back to the string stored in \a StrTab. + DenseMap StringOffsetMap; std::vector Files; std::vector UUID; std::optional ValidTextRanges; @@ -145,6 +147,149 @@ bool Finalized = false; bool Quiet; + + /// Get the first function start address. + /// + /// \returns The start address of the first FunctionInfo or std::nullopt if + /// there are no function infos. + std::optional getFirstFunctionAddress() const; + + /// Get the last function address. + /// + /// \returns The start address of the last FunctionInfo or std::nullopt if + /// there are no function infos. + std::optional getLastFunctionAddress() const; + + /// Get the base address to use for this GSYM file. + /// + /// \returns The base address to put into the header and to use when creating + /// the address offset table or std::nullpt if there are no valid + /// function infos or if the base address wasn't specified. + std::optional getBaseAddress() const; + + /// Get the size of an address offset in the address offset table. + /// + /// GSYM files store offsets from the base address in the address offset table + /// and we store the size of the address offsets in the GSYM header. This + /// function will calculate the size in bytes of these address offsets based + /// on the current contents of the GSYM file. + /// + /// \returns The size in byets of the address offsets. + uint8_t getAddressOffsetSize() const; + + /// Get the maximum address offset for the current address offset size. + /// + /// This is used when creating the address offset table to ensure we have + /// values that are in range so we don't end up truncating address offsets + /// when creating GSYM files as the code evolves. + /// + /// \returns The maximum address offset value that will be encoded into a GSYM + /// file. + uint64_t getMaxAddressOffset() const; + + /// Calculate the byte size of the GSYM header and tables sizes. + /// + /// This function will calculate the exact size in bytes of the encocded GSYM + /// for the following items: + /// - The GSYM header + /// - The Address offset table + /// - The Address info offset table + /// - The file table + /// - The string table + /// + /// This is used to help split GSYM files into segments. + /// + /// \returns Size in bytes the GSYM header and tables. + uint64_t calculateHeaderAndTableSize() const; + + /// Copy a FunctionInfo from the \a SrcGC GSYM creator into this creator. + /// + /// Copy the function info and only the needed files and strings and add a + /// converted FunctionInfo into this object. This is used to segment GSYM + /// files into separate files while only transferring the files and strings + /// that are needed from \a SrcGC. + /// + /// \param SrcGC The source gsym creator to copy from. + /// \param FuncInfoIdx The function info index within \a SrcGC to copy. + /// \returns The number of bytes it will take to encode the function info in + /// this GsymCreator. This helps calculate the size of the current GSYM + /// segment file. + uint64_t copyFunctionInfo(const GsymCreator &SrcGC, size_t FuncInfoIdx); + + /// Copy a string from \a SrcGC into this object. + /// + /// Copy a string from \a SrcGC by string table offset into this GSYM creator. + /// If a string has already been copied, the uniqued string table offset will + /// be returned, otherwise the string will be copied and a unique offset will + /// be returned. + /// + /// \param SrcGC The source gsym creator to copy from. + /// \param StrOff The string table offset from \a SrcGC to copy. + /// \returns The new string table offset of the string within this object. + uint32_t copyString(const GsymCreator &SrcGC, uint32_t StrOff); + + /// Copy a file from \a SrcGC into this object. + /// + /// Copy a file from \a SrcGC by file index into this GSYM creator. Files + /// consist of two string table entries, one for the directory and one for the + /// filename, this function will copy any needed strings ensure the file is + /// uniqued within this object. If a file already exists in this GSYM creator + /// the uniqued index will be returned, else the stirngs will be copied and + /// the new file index will be returned. + /// + /// \param SrcGC The source gsym creator to copy from. + /// \param FileIdx The 1 based file table index within \a SrcGC to copy. A + /// file index of zero will always return zero as the zero is a reserved file + /// index that means no file. + /// \returns The new file index of the file within this object. + uint32_t copyFile(const GsymCreator &SrcGC, uint32_t FileIdx); + + /// Inserts a FileEntry into the file table. + /// + /// This is used to insert a file entry in a thread safe way into this object. + /// + /// \param FE A file entry object that contains valid string table offsets + /// from this object already. + uint32_t insertFileEntry(FileEntry FE); + + /// Fixup any string and file references by updating any file indexes and + /// strings offsets in the InlineInfo parameter. + /// + /// When copying InlineInfo entries, we can simply make a copy of the object + /// and then fixup the files and strings for efficiency. + /// + /// \param SrcGC The source gsym creator to copy from. + /// \param II The inline info that contains file indexes and string offsets + /// that come from \a SrcGC. The entries will be updated by coping any files + /// and strings over into this object. + void fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II); + + /// Get the first function info address from this GSYM file. + /// + /// This is used to add a suffix to segmented GSYM files to indicate the first + /// address for the first function info within the file. + /// + /// \returns The first function info address. + uint64_t getFirstFunctionInfoAddress() const; + + /// Save this GSYM file into segments that are roughly \a SegmentSize in size. + /// + /// When segemented GSYM files are saved to disk, they will use \a Path as a + /// prefix and then have the first function info address appended to the path + /// when each segment is saved. Each segmented GSYM file has a only the + /// strings and files that are needed to save the function infos that are in + /// each segment. These smaller files are easy to compress and download + /// separately and allow for efficient lookups with very large GSYM files and + /// segmenting them allows servers to download only the segments that are + /// needed. + /// + /// \param Path The path prefix to use when saving the GSYM files. + /// \param ByteOrder The endianness to use when saving the file. + /// \param SegmentSize The size in bytes to segment the GSYM file into. + llvm::Error saveSegments(StringRef Path, + llvm::support::endianness ByteOrder, + uint64_t SegmentSize) const; + public: GsymCreator(bool Quiet = false); @@ -152,8 +297,18 @@ /// /// \param Path The file path to save the GSYM file to. /// \param ByteOrder The endianness to use when saving the file. + /// \param SegmentSize The size in bytes to segment the GSYM file into. If + /// this option is set this function will create N segments + /// that are all around \a SegmentSize bytes in size. This + /// allows a very large GSYM file to be broken up into + /// shards. Each GSYM file will have its own file table, + /// and string table that only have the files and strings + /// needed for the shared. If this argument has no value, + /// a single GSYM file that contains all function + /// information will be created. /// \returns An error object that indicates success or failure of the save. - llvm::Error save(StringRef Path, llvm::support::endianness ByteOrder) const; + llvm::Error save(StringRef Path, llvm::support::endianness ByteOrder, + std::optional SegmentSize = std::nullopt) const; /// Encode a GSYM into the file writer stream at the current position. /// @@ -291,6 +446,28 @@ /// Whether the transformation should be quiet, i.e. not output warnings. bool isQuiet() const { return Quiet; } + + + /// Create a segmented GSYM creator starting with function info index + /// \a FuncIdx. + /// + /// This function will create a GsymCreator object that will encode into + /// roughly \a SegmentSize bytes and return it. It is used by the private + /// saveSegments(...) function and also is used by the GSYM unit tests to test + /// segmenting of GSYM files. The returned GsymCreator can be finalized and + /// encoded. + /// + /// \param [in] SegmentSize The size in bytes to roughly segment the GSYM file + /// into. + /// \param [in,out] FuncIdx The index of the first function info to encode + /// into the returned GsymCreator. This index will be updated so it can be + /// used in subsequent calls to this function to allow more segments to be + /// created. + /// \returns An expected unique pointer to a GsymCreator or an error. The + /// returned unique pointer can be NULL if there are no more functions to + /// encode. + llvm::Expected> + createSegment(uint64_t SegmentSize, size_t &FuncIdx) const; }; } // namespace gsym diff --git a/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h b/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h --- a/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h +++ b/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h @@ -52,6 +52,16 @@ std::string getSourceFile(uint32_t Index) const; }; +inline bool operator==(const LookupResult &LHS, const LookupResult &RHS) { + if (LHS.LookupAddr != RHS.LookupAddr) + return false; + if (LHS.FuncRange != RHS.FuncRange) + return false; + if (LHS.FuncName != RHS.FuncName) + return false; + return LHS.Locations == RHS.Locations; +} + raw_ostream &operator<<(raw_ostream &OS, const LookupResult &R); } // namespace gsym diff --git a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp --- a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp +++ b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp @@ -96,57 +96,83 @@ return std::move(FI); } -llvm::Expected FunctionInfo::encode(FileWriter &O) const { +uint64_t FunctionInfo::cacheEncoding() { + EncodingCache.clear(); + if (!isValid()) + return 0; + raw_svector_ostream OutStrm(EncodingCache); + FileWriter FW(OutStrm, support::endian::system_endianness()); + llvm::Expected Result = encode(FW); + if (!Result) { + EncodingCache.clear(); + consumeError(Result.takeError()); + return 0; + } + return EncodingCache.size(); +} + +llvm::Expected FunctionInfo::encode(FileWriter &Out) const { if (!isValid()) return createStringError(std::errc::invalid_argument, "attempted to encode invalid FunctionInfo object"); // Align FunctionInfo data to a 4 byte alignment. - O.alignTo(4); - const uint64_t FuncInfoOffset = O.tell(); + Out.alignTo(4); + const uint64_t FuncInfoOffset = Out.tell(); + // Check if we have already encoded this function info into EncodingCache. + // This will be non empty when creating segmented GSYM files as we need to + // precompute exactly how big FunctionInfo objects encode into so we can + // accurately make segments of a specific size. + if (!EncodingCache.empty() && + support::endian::system_endianness() == Out.getByteOrder()) { + // We already encoded this object, just write out the bytes. + Out.writeData(llvm::ArrayRef((const uint8_t *)EncodingCache.data(), + EncodingCache.size())); + return FuncInfoOffset; + } // Write the size in bytes of this function as a uint32_t. This can be zero // if we just have a symbol from a symbol table and that symbol has no size. - O.writeU32(size()); + Out.writeU32(size()); // Write the name of this function as a uint32_t string table offset. - O.writeU32(Name); + Out.writeU32(Name); if (OptLineTable) { - O.writeU32(InfoType::LineTableInfo); + Out.writeU32(InfoType::LineTableInfo); // Write a uint32_t length as zero for now, we will fix this up after // writing the LineTable out with the number of bytes that were written. - O.writeU32(0); - const auto StartOffset = O.tell(); - llvm::Error err = OptLineTable->encode(O, Range.start()); + Out.writeU32(0); + const auto StartOffset = Out.tell(); + llvm::Error err = OptLineTable->encode(Out, Range.start()); if (err) return std::move(err); - const auto Length = O.tell() - StartOffset; + const auto Length = Out.tell() - StartOffset; if (Length > UINT32_MAX) return createStringError(std::errc::invalid_argument, "LineTable length is greater than UINT32_MAX"); // Fixup the size of the LineTable data with the correct size. - O.fixup32(static_cast(Length), StartOffset - 4); + Out.fixup32(static_cast(Length), StartOffset - 4); } // Write out the inline function info if we have any and if it is valid. if (Inline) { - O.writeU32(InfoType::InlineInfo); + Out.writeU32(InfoType::InlineInfo); // Write a uint32_t length as zero for now, we will fix this up after // writing the LineTable out with the number of bytes that were written. - O.writeU32(0); - const auto StartOffset = O.tell(); - llvm::Error err = Inline->encode(O, Range.start()); + Out.writeU32(0); + const auto StartOffset = Out.tell(); + llvm::Error err = Inline->encode(Out, Range.start()); if (err) return std::move(err); - const auto Length = O.tell() - StartOffset; + const auto Length = Out.tell() - StartOffset; if (Length > UINT32_MAX) return createStringError(std::errc::invalid_argument, "InlineInfo length is greater than UINT32_MAX"); // Fixup the size of the InlineInfo data with the correct size. - O.fixup32(static_cast(Length), StartOffset - 4); + Out.fixup32(static_cast(Length), StartOffset - 4); } // Terminate the data chunks with and end of list with zero size - O.writeU32(InfoType::EndOfList); - O.writeU32(0); + Out.writeU32(InfoType::EndOfList); + Out.writeU32(0); return FuncInfoOffset; } diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp --- a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp +++ b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp @@ -34,8 +34,10 @@ // requirements. const uint32_t Dir = insertString(directory); const uint32_t Base = insertString(filename); - FileEntry FE(Dir, Base); + return insertFileEntry(FileEntry(Dir, Base)); +} +uint32_t GsymCreator::insertFileEntry(FileEntry FE) { std::lock_guard Guard(Mutex); const auto NextIndex = Files.size(); // Find FE in hash map and insert if not present. @@ -45,8 +47,26 @@ return R.first->second; } +uint32_t GsymCreator::copyFile(const GsymCreator &SrcGC, uint32_t FileIdx) { + // File index zero is reserved for a FileEntry with no directory and no + // filename. Any other file and we need to copy the strings for the directory + // and filename. + if (FileIdx == 0) + return 0; + const FileEntry SrcFE = SrcGC.Files[FileIdx]; + // Copy the strings for the file and then add the newly converted file entry. + uint32_t Dir = StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Dir)->second); + uint32_t Base = StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Base)->second); + FileEntry DstFE(Dir, Base); + return insertFileEntry(DstFE); +} + + llvm::Error GsymCreator::save(StringRef Path, - llvm::support::endianness ByteOrder) const { + llvm::support::endianness ByteOrder, + std::optional SegmentSize) const { + if (SegmentSize) + return saveSegments(Path, ByteOrder, *SegmentSize); std::error_code EC; raw_fd_ostream OutStrm(Path, EC); if (EC) @@ -68,16 +88,17 @@ return createStringError(std::errc::invalid_argument, "too many FunctionInfos"); - const uint64_t MinAddr = - BaseAddress ? *BaseAddress : Funcs.front().startAddress(); - const uint64_t MaxAddr = Funcs.back().startAddress(); - const uint64_t AddrDelta = MaxAddr - MinAddr; + std::optional BaseAddress = getBaseAddress(); + // Base address should be valid if we have any functions. + if (!BaseAddress) + return createStringError(std::errc::invalid_argument, + "invalid base address"); Header Hdr; Hdr.Magic = GSYM_MAGIC; Hdr.Version = GSYM_VERSION; - Hdr.AddrOffSize = 0; + Hdr.AddrOffSize = getAddressOffsetSize(); Hdr.UUIDSize = static_cast(UUID.size()); - Hdr.BaseAddress = MinAddr; + Hdr.BaseAddress = *BaseAddress; Hdr.NumAddresses = static_cast(Funcs.size()); Hdr.StrtabOffset = 0; // We will fix this up later. Hdr.StrtabSize = 0; // We will fix this up later. @@ -85,15 +106,6 @@ if (UUID.size() > sizeof(Hdr.UUID)) return createStringError(std::errc::invalid_argument, "invalid UUID size %u", (uint32_t)UUID.size()); - // Set the address offset size correctly in the GSYM header. - if (AddrDelta <= UINT8_MAX) - Hdr.AddrOffSize = 1; - else if (AddrDelta <= UINT16_MAX) - Hdr.AddrOffSize = 2; - else if (AddrDelta <= UINT32_MAX) - Hdr.AddrOffSize = 4; - else - Hdr.AddrOffSize = 8; // Copy the UUID value if we have one. if (UUID.size() > 0) memcpy(Hdr.UUID, UUID.data(), UUID.size()); @@ -102,10 +114,16 @@ if (Err) return Err; + const uint64_t MaxAddressOffset = getMaxAddressOffset(); // Write out the address offsets. O.alignTo(Hdr.AddrOffSize); for (const auto &FuncInfo : Funcs) { uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress; + // Make sure we calculated the address offsets byte size correctly by + // verifying the current address offset is within ranges. We have seen bugs + // introduced when the code changes that can cause problems here so it is + // good to catch this during testing. + assert(AddrOffset <= MaxAddressOffset); switch (Hdr.AddrOffSize) { case 1: O.writeU8(static_cast(AddrOffset)); @@ -142,7 +160,7 @@ O.writeU32(File.Base); } - // Write out the sting table. + // Write out the string table. const off_t StrtabOffset = O.tell(); StrTab.write(O.get_stream()); const off_t StrtabSize = O.tell() - StrtabOffset; @@ -300,6 +318,13 @@ return Error::success(); } +uint32_t GsymCreator::copyString(const GsymCreator &SrcGC, uint32_t StrOff) { + // String offset at zero is always the empty string, no copying needed. + if (StrOff == 0) + return 0; + return StrTab.add(SrcGC.StringOffsetMap.find(StrOff)->second); +} + uint32_t GsymCreator::insertString(StringRef S, bool Copy) { if (S.empty()) return 0; @@ -318,7 +343,13 @@ CHStr = CachedHashStringRef{StringStorage.insert(S).first->getKey(), CHStr.hash()}; } - return StrTab.add(CHStr); + const uint32_t StrOff = StrTab.add(CHStr); + // Save a mapping of string offsets to the cached string reference in case + // we need to segment the GSYM file and copy string from one string table to + // another. + if (StringOffsetMap.count(StrOff) == 0) + StringOffsetMap.insert(std::make_pair(StrOff, CHStr)); + return StrOff; } void GsymCreator::addFunctionInfo(FunctionInfo &&FI) { @@ -360,3 +391,187 @@ std::lock_guard Guard(Mutex); return Ranges.contains(Addr); } + +std::optional GsymCreator::getFirstFunctionAddress() const { + if (Finalized && !Funcs.empty()) + return std::optional(Funcs.front().startAddress()); + // This code gets used by the segmentation of GSYM files to help determine the + // size of the GSYM header while continually adding new FunctionInfo objects + // to this object, so we haven't finalized this object yet. + if (Ranges.empty()) + return std::nullopt; + return std::optional(Ranges.begin()->start()); +} + +std::optional GsymCreator::getLastFunctionAddress() const { + if (Finalized && !Funcs.empty()) + return std::optional(Funcs.back().startAddress()); + // This code gets used by the segmentation of GSYM files to help determine the + // size of the GSYM header while continually adding new FunctionInfo objects + // to this object, so we haven't finalized this object yet. + if (Ranges.empty()) + return std::nullopt; + return std::optional((Ranges.end() - 1)->end()); +} + +std::optional GsymCreator::getBaseAddress() const { + if (BaseAddress) + return BaseAddress; + return getFirstFunctionAddress(); +} + +uint64_t GsymCreator::getMaxAddressOffset() const { + switch (getAddressOffsetSize()) { + case 1: return UINT8_MAX; + case 2: return UINT16_MAX; + case 4: return UINT32_MAX; + case 8: return UINT64_MAX; + } + llvm_unreachable("invalid address offset"); +} + +uint8_t GsymCreator::getAddressOffsetSize() const { + const std::optional BaseAddress = getBaseAddress(); + const std::optional LastFuncAddr = getLastFunctionAddress(); + if (BaseAddress && LastFuncAddr) { + const uint64_t AddrDelta = *LastFuncAddr - *BaseAddress; + if (AddrDelta <= UINT8_MAX) + return 1; + else if (AddrDelta <= UINT16_MAX) + return 2; + else if (AddrDelta <= UINT32_MAX) + return 4; + return 8; + } + return 1; +} + +uint64_t GsymCreator::calculateHeaderAndTableSize() const { + uint64_t Size = sizeof(Header); + const size_t NumFuncs = Funcs.size(); + // Add size of address offset table + Size += NumFuncs * getAddressOffsetSize(); + // Add size of address info offsets which are 32 bit integers in version 1. + Size += NumFuncs * sizeof(uint32_t); + // Add file table size + Size += Files.size() * sizeof(FileEntry); + // Add string table size + Size += StrTab.getSize(); + + return Size; +} + +// This function takes a InlineInfo class that was copy constructed from an +// InlineInfo from the \a SrcGC and updates all members that point to strings +// and files to point to strings and files from this GsymCreator. +void GsymCreator::fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II) { + II.Name = copyString(SrcGC, II.Name); + II.CallFile = copyFile(SrcGC, II.CallFile); + for (auto &ChildII: II.Children) + fixupInlineInfo(SrcGC, ChildII); +} + +uint64_t GsymCreator::copyFunctionInfo(const GsymCreator &SrcGC, size_t FuncIdx) { + // To copy a function info we need to copy any files and strings over into + // this GsymCreator and then copy the function info and update the string + // table offsets to match the new offsets. + const FunctionInfo &SrcFI = SrcGC.Funcs[FuncIdx]; + Ranges.insert(SrcFI.Range); + + FunctionInfo DstFI; + DstFI.Range = SrcFI.Range; + DstFI.Name = copyString(SrcGC, SrcFI.Name); + // Copy the line table if there is one. + if (SrcFI.OptLineTable) { + // Copy the entire line table. + DstFI.OptLineTable = LineTable(SrcFI.OptLineTable.value()); + // Fixup all LineEntry::File entries which are indexes in the the file table + // from SrcGC and must be converted to file indexes from this GsymCreator. + LineTable &DstLT = DstFI.OptLineTable.value(); + const size_t NumLines = DstLT.size(); + for (size_t I=0; I Guard(Mutex); + Funcs.push_back(DstFI); + return Funcs.back().cacheEncoding(); +} + +llvm::Error GsymCreator::saveSegments(StringRef Path, + llvm::support::endianness ByteOrder, + uint64_t SegmentSize) const { + if (SegmentSize == 0) + return createStringError(std::errc::invalid_argument, + "invalid segment size zero"); + + size_t FuncIdx = 0; + const size_t NumFuncs = Funcs.size(); + while (FuncIdx < NumFuncs) { + llvm::Expected> ExpectedGC = + createSegment(SegmentSize, FuncIdx); + if (ExpectedGC) { + GsymCreator *GC = ExpectedGC->get(); + if (GC == NULL) + break; // We had not more functions to encode. + raw_null_ostream ErrorStrm; + llvm::Error Err = GC->finalize(ErrorStrm); + if (Err) + return Err; + std::string SegmentedGsymPath; + raw_string_ostream SGP(SegmentedGsymPath); + std::optional FirstFuncAddr = GC->getFirstFunctionAddress(); + if (FirstFuncAddr) { + SGP << Path << "-" << llvm::format_hex(*FirstFuncAddr, 1); + SGP.flush(); + Err = GC->save(SegmentedGsymPath, ByteOrder, std::nullopt); + if (Err) + return Err; + } + } else { + return ExpectedGC.takeError(); + } + } + return Error::success(); +} + +llvm::Expected> +GsymCreator::createSegment(uint64_t SegmentSize, size_t &FuncIdx) const { + // No function entries, return empty unique pointer + if (FuncIdx >= Funcs.size()) + return std::unique_ptr(); + + std::unique_ptr GC(new GsymCreator(/*Quiet=*/true)); + // Set the base address if there is one. + if (BaseAddress) + GC->setBaseAddress(*BaseAddress); + // Copy the UUID value from this object into the new creator. + GC->setUUID(UUID); + const size_t NumFuncs = Funcs.size(); + // Track how big the function infos are for the current segment so we can + // emit segments that are close to the requested size. It is quick math to + // determine the current header and tables sizes, so we can do that each loop. + uint64_t SegmentFuncInfosSize = 0; + for (; FuncIdx < NumFuncs; ++FuncIdx) { + const uint64_t HeaderAndTableSize = GC->calculateHeaderAndTableSize(); + if (HeaderAndTableSize + SegmentFuncInfosSize >= SegmentSize) { + if (SegmentFuncInfosSize == 0) + return createStringError(std::errc::invalid_argument, + "a segment size of %" PRIu64 " is to small to " + "fit any function infos, specify a larger value", + SegmentSize); + + break; + } + SegmentFuncInfosSize += alignTo(GC->copyFunctionInfo(*this, FuncIdx), 4); + } + return std::move(GC); +} diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp --- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp +++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -107,6 +108,13 @@ "number of cores on the current machine."), cl::value_desc("n"), cat(ConversionOptions)); +static opt + SegmentSize("segment-size", + desc("Specify the size in bytes of the size the final GSYM file " + "should be segmented into. This allows GSYM files to be " + "split across multiple files."), + cl::value_desc("s"), cat(ConversionOptions)); + static opt Quiet("quiet", desc("Do not output warnings about the debug information"), cat(ConversionOptions)); @@ -310,7 +318,11 @@ // Save the GSYM file to disk. support::endianness Endian = Obj.makeTriple().isLittleEndian() ? support::little : support::big; - if (auto Err = Gsym.save(OutFile, Endian)) + + std::optional OptSegmentSize; + if (SegmentSize > 0) + OptSegmentSize = SegmentSize; + if (auto Err = Gsym.save(OutFile, Endian, OptSegmentSize)) return Err; // Verify the DWARF if requested. This will ensure all the info in the DWARF diff --git a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp --- a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp +++ b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp @@ -2443,3 +2443,211 @@ 1, // NumAddresses ArrayRef(UUID)); } + +// Helper function to quickly create a FunctionInfo in a GsymCreator for testing. +static void AddFunctionInfo(GsymCreator &GC, const char *FuncName, + uint64_t FuncAddr, const char *SourcePath, + const char *HeaderPath) { + FunctionInfo FI(FuncAddr, 0x30, GC.insertString(FuncName)); + FI.OptLineTable = LineTable(); + const uint32_t SourceFileIdx = GC.insertFile(SourcePath); + const uint32_t HeaderFileIdx = GC.insertFile(HeaderPath); + FI.OptLineTable->push(LineEntry(FuncAddr+0x00, SourceFileIdx, 5)); + FI.OptLineTable->push(LineEntry(FuncAddr+0x10, HeaderFileIdx, 10)); + FI.OptLineTable->push(LineEntry(FuncAddr+0x12, HeaderFileIdx, 20)); + FI.OptLineTable->push(LineEntry(FuncAddr+0x14, HeaderFileIdx, 11)); + FI.OptLineTable->push(LineEntry(FuncAddr+0x16, HeaderFileIdx, 30)); + FI.OptLineTable->push(LineEntry(FuncAddr+0x18, HeaderFileIdx, 12)); + FI.OptLineTable->push(LineEntry(FuncAddr+0x20, SourceFileIdx, 8)); + FI.Inline = InlineInfo(); + + std::string InlineName1(FuncName); InlineName1.append("1"); + std::string InlineName2(FuncName); InlineName2.append("2"); + std::string InlineName3(FuncName); InlineName3.append("3"); + + FI.Inline->Name = GC.insertString(InlineName1); + FI.Inline->CallFile = SourceFileIdx; + FI.Inline->CallLine = 6; + FI.Inline->Ranges.insert(AddressRange(FuncAddr + 0x10, FuncAddr + 0x20)); + InlineInfo Inline2; + Inline2.Name = GC.insertString(InlineName2); + Inline2.CallFile = HeaderFileIdx; + Inline2.CallLine = 33; + Inline2.Ranges.insert(AddressRange(FuncAddr + 0x12, FuncAddr + 0x14)); + FI.Inline->Children.emplace_back(Inline2); + InlineInfo Inline3; + Inline3.Name = GC.insertString(InlineName3); + Inline3.CallFile = HeaderFileIdx; + Inline3.CallLine = 35; + Inline3.Ranges.insert(AddressRange(FuncAddr + 0x16, FuncAddr + 0x18)); + FI.Inline->Children.emplace_back(Inline3); + GC.addFunctionInfo(std::move(FI)); +} + +// Finalize a GsymCreator, encode it and decode it and return the error or +// GsymReader that was successfully decoded. +static Expected FinalizeEncodeAndDecode(GsymCreator &GC) { + Error FinalizeErr = GC.finalize(llvm::nulls()); + if (FinalizeErr) + return std::move(FinalizeErr); + SmallString<1024> Str; + raw_svector_ostream OutStrm(Str); + const auto ByteOrder = support::endian::system_endianness(); + FileWriter FW(OutStrm, ByteOrder); + llvm::Error Err = GC.encode(FW); + if (Err) + return std::move(Err); + return GsymReader::copyBuffer(OutStrm.str()); +} + +TEST(GSYMTest, TestGsymSegmenting) { + // Test creating a GSYM file with function infos and segment the information. + // We verify segmenting is working by creating a full GSYM and also by + // encoding multiple segments, then we verify that we get the same information + // when doing lookups on the full GSYM that was decoded from encoding the + // entire GSYM and also by decoding information from the segments themselves. + GsymCreator GC; + GC.setBaseAddress(0); + AddFunctionInfo(GC, "main", 0x1000, "/tmp/main.c", "/tmp/main.h"); + AddFunctionInfo(GC, "foo", 0x2000, "/tmp/foo.c", "/tmp/foo.h"); + AddFunctionInfo(GC, "bar", 0x3000, "/tmp/bar.c", "/tmp/bar.h"); + AddFunctionInfo(GC, "baz", 0x4000, "/tmp/baz.c", "/tmp/baz.h"); + Expected GR = FinalizeEncodeAndDecode(GC); + ASSERT_THAT_EXPECTED(GR, Succeeded()); + //GR->dump(outs()); + + // Create segmented GSYM files where each file contains 1 function. We will + // then test doing lookups on the "GR", or the full GSYM file and then test + // doing lookups on the GsymReader objects for each segment to ensure we get + // the exact same information. So after all of the code below we will have + // GsymReader objects that each contain one function. We name the creators + // and readers to match the one and only address they contain. + // GC1000 and GR1000 are for [0x1000-0x1030) + // GC2000 and GR2000 are for [0x2000-0x2030) + // GC3000 and GR3000 are for [0x3000-0x3030) + // GC4000 and GR4000 are for [0x4000-0x4030) + + // Create the segments and verify that FuncIdx, an in/out parameter, gets + // updated as expected. + size_t FuncIdx = 0; + // Make sure we get an error if the segment size is too small to encode a + // single function info. + llvm::Expected> GCError = + GC.createSegment(57, FuncIdx); + ASSERT_FALSE((bool)GCError); + checkError("a segment size of 57 is to small to fit any function infos, " + "specify a larger value", GCError.takeError()); + // Make sure that the function index didn't get incremented when we didn't + // encode any values into the segmented GsymCreator. + ASSERT_EQ(FuncIdx, (size_t)0); + + llvm::Expected> GC1000 = + GC.createSegment(128, FuncIdx); + ASSERT_THAT_EXPECTED(GC1000, Succeeded()); + ASSERT_EQ(FuncIdx, (size_t)1); + llvm::Expected> GC2000 = + GC.createSegment(128, FuncIdx); + ASSERT_THAT_EXPECTED(GC2000, Succeeded()); + ASSERT_EQ(FuncIdx, (size_t)2); + llvm::Expected> GC3000 = + GC.createSegment(128, FuncIdx); + ASSERT_THAT_EXPECTED(GC3000, Succeeded()); + ASSERT_EQ(FuncIdx, (size_t)3); + llvm::Expected> GC4000 = + GC.createSegment(128, FuncIdx); + ASSERT_THAT_EXPECTED(GC4000, Succeeded()); + ASSERT_EQ(FuncIdx, (size_t)4); + // When there are no function infos left to encode we expect to get no error + // and get a NULL GsymCreator in the return value from createSegment. + llvm::Expected> GCNull = + GC.createSegment(128, FuncIdx); + ASSERT_THAT_EXPECTED(GCNull, Succeeded()); + ASSERT_TRUE(GC1000.get() != nullptr); + ASSERT_TRUE(GC2000.get() != nullptr); + ASSERT_TRUE(GC3000.get() != nullptr); + ASSERT_TRUE(GC4000.get() != nullptr); + ASSERT_TRUE(GCNull.get() == nullptr); + // Encode and decode the GsymReader for each segment and verify they succeed. + Expected GR1000 = FinalizeEncodeAndDecode(*GC1000.get()); + ASSERT_THAT_EXPECTED(GR1000, Succeeded()); + Expected GR2000 = FinalizeEncodeAndDecode(*GC2000.get()); + ASSERT_THAT_EXPECTED(GR2000, Succeeded()); + Expected GR3000 = FinalizeEncodeAndDecode(*GC3000.get()); + ASSERT_THAT_EXPECTED(GR3000, Succeeded()); + Expected GR4000 = FinalizeEncodeAndDecode(*GC4000.get()); + ASSERT_THAT_EXPECTED(GR4000, Succeeded()); + + // Verify that all lookups match the range [0x1000-0x1030) when doing lookups + // in the GsymReader that contains all functions and from the segmented + // GsymReader in GR1000. + for (uint64_t Addr = 0x1000; Addr < 0x1030; ++Addr) { + // Lookup in the main GsymReader that contains all function infos + auto MainLR = GR->lookup(Addr); + ASSERT_THAT_EXPECTED(MainLR, Succeeded()); + auto SegmentLR = GR1000->lookup(Addr); + ASSERT_THAT_EXPECTED(SegmentLR, Succeeded()); + // Make sure the lookup results match. + EXPECT_EQ(MainLR.get(), SegmentLR.get()); + // Make sure that the lookups on the functions that are not in the segment + // fail as expected. + ASSERT_THAT_EXPECTED(GR1000->lookup(0x2000), Failed()); + ASSERT_THAT_EXPECTED(GR1000->lookup(0x3000), Failed()); + ASSERT_THAT_EXPECTED(GR1000->lookup(0x4000), Failed()); + } + + // Verify that all lookups match the range [0x2000-0x2030) when doing lookups + // in the GsymReader that contains all functions and from the segmented + // GsymReader in GR2000. + for (uint64_t Addr = 0x2000; Addr < 0x2030; ++Addr) { + // Lookup in the main GsymReader that contains all function infos + auto MainLR = GR->lookup(Addr); + ASSERT_THAT_EXPECTED(MainLR, Succeeded()); + auto SegmentLR = GR2000->lookup(Addr); + ASSERT_THAT_EXPECTED(SegmentLR, Succeeded()); + // Make sure the lookup results match. + EXPECT_EQ(MainLR.get(), SegmentLR.get()); + // Make sure that the lookups on the functions that are not in the segment + // fail as expected. + ASSERT_THAT_EXPECTED(GR2000->lookup(0x1000), Failed()); + ASSERT_THAT_EXPECTED(GR2000->lookup(0x3000), Failed()); + ASSERT_THAT_EXPECTED(GR2000->lookup(0x4000), Failed()); + + } + + // Verify that all lookups match the range [0x3000-0x3030) when doing lookups + // in the GsymReader that contains all functions and from the segmented + // GsymReader in GR3000. + for (uint64_t Addr = 0x3000; Addr < 0x3030; ++Addr) { + // Lookup in the main GsymReader that contains all function infos + auto MainLR = GR->lookup(Addr); + ASSERT_THAT_EXPECTED(MainLR, Succeeded()); + auto SegmentLR = GR3000->lookup(Addr); + ASSERT_THAT_EXPECTED(SegmentLR, Succeeded()); + // Make sure the lookup results match. + EXPECT_EQ(MainLR.get(), SegmentLR.get()); + // Make sure that the lookups on the functions that are not in the segment + // fail as expected. + ASSERT_THAT_EXPECTED(GR3000->lookup(0x1000), Failed()); + ASSERT_THAT_EXPECTED(GR3000->lookup(0x2000), Failed()); + ASSERT_THAT_EXPECTED(GR3000->lookup(0x4000), Failed()); +} + + // Verify that all lookups match the range [0x4000-0x4030) when doing lookups + // in the GsymReader that contains all functions and from the segmented + // GsymReader in GR4000. + for (uint64_t Addr = 0x4000; Addr < 0x4030; ++Addr) { + // Lookup in the main GsymReader that contains all function infos + auto MainLR = GR->lookup(Addr); + ASSERT_THAT_EXPECTED(MainLR, Succeeded()); + // Lookup in the GsymReader for that contains 0x4000 + auto SegmentLR = GR4000->lookup(Addr); + ASSERT_THAT_EXPECTED(SegmentLR, Succeeded()); + // Make sure the lookup results match. + EXPECT_EQ(MainLR.get(), SegmentLR.get()); + // Make sure that the lookups on the functions that are not in the segment + // fail as expected. + ASSERT_THAT_EXPECTED(GR4000->lookup(0x1000), Failed()); + ASSERT_THAT_EXPECTED(GR4000->lookup(0x2000), Failed()); + ASSERT_THAT_EXPECTED(GR4000->lookup(0x3000), Failed()); + } +}