Index: include/llvm/DebugInfo/GSYM/FileWriter.h =================================================================== --- include/llvm/DebugInfo/GSYM/FileWriter.h +++ include/llvm/DebugInfo/GSYM/FileWriter.h @@ -0,0 +1,121 @@ +//===- FileWriter.h ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_FILEWRITER_H +#define LLVM_DEBUGINFO_GSYM_FILEWRITER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/Endian.h" + +#include +#include +#include + +namespace llvm { +class raw_pwrite_stream; + +namespace gsym { + +/// A simplified binary data writer class that doesn't require targets, target +/// definitions, architectures, or require any other optional compile time +/// libraries to be enabled via the build process. This class needs the ability +/// to seek to different spots in the binary stream that is produces to fixup +/// offsets and sizes. +class FileWriter { + llvm::raw_pwrite_stream &OS; + llvm::support::endianness ByteOrder; +public: + FileWriter(llvm::raw_pwrite_stream &S, llvm::support::endianness B) + : OS(S), ByteOrder(B) {} + ~FileWriter(); + /// Write a single uint8_t value into the stream at the current file + /// position. + /// + /// \param Value The value to write into the stream. + void writeU8(uint8_t Value); + + /// Write a single uint16_t value into the stream at the current file + /// position. The value will be byte swapped if needed to match the byte + /// order specified during construction. + /// + /// \param Value The value to write into the stream. + void writeU16(uint16_t Value); + + /// Write a single uint32_t value into the stream at the current file + /// position. The value will be byte swapped if needed to match the byte + /// order specified during construction. + /// + /// \param Value The value to write into the stream. + void writeU32(uint32_t Value); + + /// Write a single uint64_t value into the stream at the current file + /// position. The value will be byte swapped if needed to match the byte + /// order specified during construction. + /// + /// \param Value The value to write into the stream. + void writeU64(uint64_t Value); + + /// Write the value into the stream encoded using signed LEB128 at the + /// current file position. + /// + /// \param Value The value to write into the stream. + void writeSLEB(int64_t Value); + + /// Write the value into the stream encoded using unsigned LEB128 at the + /// current file position. + /// + /// \param Value The value to write into the stream. + void writeULEB(uint64_t Value); + + /// Write an array of uint8_t values into the stream at the current file + /// position. + /// + /// \param Data An array of values to write into the stream. + void writeData(llvm::ArrayRef Data); + + /// Write a NULL terminated C string into the stream at the current file + /// position. The entire contents of Str will be written into the steam at + /// the current file position and then an extra NULL termation byte will be + /// written. It is up to the user to ensure that Str doesn't contain any NULL + /// characters unless the additional NULL characters are desired. + /// + /// \param Str The value to write into the stream. + void writeNullTerminated(llvm::StringRef Str); + + /// Fixup a uint32_t value at the specified offset in the stream. This + /// function will save the current file position, seek to the specified + /// offset, overwrite the data using Value, and then restore the file + /// position to the previous file position. + /// + /// \param Value The value to write into the stream. + /// \param Offset The offset at which to write the Value within the stream. + void fixup32(uint32_t Value, uint64_t Offset); + + /// Pad with zeroes at the current file position until the current file + /// position matches the specified alignment. + /// + /// \param Align An integer speciying the desired alignment. This does not + /// need to be a power of two. + void alignTo(size_t Align); + + /// Return the current offset within the file. + /// + /// \return The unsigned offset from the start of the file of the current + /// file position. + uint64_t tell(); + +private: + FileWriter(const FileWriter &rhs) = delete; + void operator=(const FileWriter &rhs) = delete; +}; + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_FILEWRITER_H Index: include/llvm/DebugInfo/GSYM/Range.h =================================================================== --- include/llvm/DebugInfo/GSYM/Range.h +++ include/llvm/DebugInfo/GSYM/Range.h @@ -21,10 +21,13 @@ #define HEX64(v) llvm::format_hex(v, 18) namespace llvm { +class DataExtractor; class raw_ostream; namespace gsym { +class FileWriter; + /// A class that represents an address range. The range is specified using /// a start and an end address. struct AddressRange { @@ -47,6 +50,18 @@ bool operator<(const AddressRange &R) const { return std::make_pair(Start, End) < std::make_pair(R.Start, R.End); } + /// AddressRange objects are encoded and decoded to be relative to a base + /// address. This will be the FunctionInfo's start address if the AddressRange + /// is directly contained in a FunctionInfo, or a base address of the + /// containing parent AddressRange or AddressRanges. This allows address + /// ranges to be efficiently encoded using ULEB128 encodings as we encode the + /// offset and size of each range instead of full addresses. This also makes + /// encoded addresses easy to relocate as we just need to relocate one base + /// address. + /// @{ + void decode(DataExtractor &Data, uint64_t BaseAddr, uint64_t &Offset); + void encode(FileWriter &O, uint64_t BaseAddr) const; + /// @} }; raw_ostream &operator<<(raw_ostream &OS, const AddressRange &R); @@ -77,6 +92,14 @@ } Collection::const_iterator begin() const { return Ranges.begin(); } Collection::const_iterator end() const { return Ranges.end(); } + + /// Address ranges are decoded and encoded to be relative to a base address. + /// See the AddressRange comment for the encode and decode methods for full + /// details. + /// @{ + void decode(DataExtractor &Data, uint64_t BaseAddr, uint64_t &Offset); + void encode(FileWriter &O, uint64_t BaseAddr) const; + /// @} }; raw_ostream &operator<<(raw_ostream &OS, const AddressRanges &AR); Index: lib/DebugInfo/GSYM/CMakeLists.txt =================================================================== --- lib/DebugInfo/GSYM/CMakeLists.txt +++ lib/DebugInfo/GSYM/CMakeLists.txt @@ -1,4 +1,5 @@ add_llvm_library(LLVMDebugInfoGSYM + FileWriter.cpp FunctionInfo.cpp InlineInfo.cpp Range.cpp Index: lib/DebugInfo/GSYM/FileWriter.cpp =================================================================== --- lib/DebugInfo/GSYM/FileWriter.cpp +++ lib/DebugInfo/GSYM/FileWriter.cpp @@ -0,0 +1,78 @@ +//===- FileWriter.cpp -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; +using namespace gsym; + +FileWriter::~FileWriter() { OS.flush(); } + +void FileWriter::writeSLEB(int64_t S) { + uint8_t Bytes[32]; + auto Length = encodeSLEB128(S, Bytes); + assert(Length < sizeof(Bytes)); + OS.write(reinterpret_cast(Bytes), Length); +} + +void FileWriter::writeULEB(uint64_t U) { + uint8_t Bytes[32]; + auto Length = encodeULEB128(U, Bytes); + assert(Length < sizeof(Bytes)); + OS.write(reinterpret_cast(Bytes), Length); +} + +void FileWriter::writeU8(uint8_t U) { + OS.write(reinterpret_cast(&U), sizeof(U)); +} + +void FileWriter::writeU16(uint16_t U) { + const uint16_t Swapped = support::endian::byte_swap(U, ByteOrder); + OS.write(reinterpret_cast(&Swapped), sizeof(Swapped)); +} + +void FileWriter::writeU32(uint32_t U) { + const uint32_t Swapped = support::endian::byte_swap(U, ByteOrder); + OS.write(reinterpret_cast(&Swapped), sizeof(Swapped)); +} + +void FileWriter::writeU64(uint64_t U) { + const uint64_t Swapped = support::endian::byte_swap(U, ByteOrder); + OS.write(reinterpret_cast(&Swapped), sizeof(Swapped)); +} + +void FileWriter::fixup32(uint32_t U, uint64_t Offset) { + const uint32_t Swapped = support::endian::byte_swap(U, ByteOrder); + OS.pwrite(reinterpret_cast(&Swapped), sizeof(Swapped), + Offset); +} + +void FileWriter::writeData(llvm::ArrayRef Data) { + OS.write(reinterpret_cast(Data.data()), Data.size()); +} + +void FileWriter::writeNullTerminated(llvm::StringRef Str) { + OS << Str << '\0'; +} + +uint64_t FileWriter::tell() { + return OS.tell(); +} + +void FileWriter::alignTo(size_t Align) { + off_t Offset = OS.tell(); + off_t AlignedOffset = (Offset + Align - 1) / Align * Align; + if (AlignedOffset == Offset) + return; + off_t PadCount = AlignedOffset - Offset; + OS.write_zeros(PadCount); +} Index: lib/DebugInfo/GSYM/Range.cpp =================================================================== --- lib/DebugInfo/GSYM/Range.cpp +++ lib/DebugInfo/GSYM/Range.cpp @@ -8,6 +8,8 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/GSYM/Range.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/Support/DataExtractor.h" #include #include @@ -53,3 +55,37 @@ } return OS; } + +void AddressRange::encode(FileWriter &O, uint64_t BaseAddr) const { + assert(Start >= BaseAddr); + O.writeULEB(Start - BaseAddr); + O.writeULEB(size()); +} + +void AddressRange::decode(DataExtractor &Data, uint64_t BaseAddr, + uint64_t &Offset) { + const uint64_t AddrOffset = Data.getULEB128(&Offset); + const uint64_t Size = Data.getULEB128(&Offset); + const uint64_t StartAddr = BaseAddr + AddrOffset; + Start = StartAddr; + End = StartAddr + Size; +} + +void AddressRanges::encode(FileWriter &O, uint64_t BaseAddr) const { + O.writeULEB(Ranges.size()); + if (Ranges.empty()) + return; + for (auto Range : Ranges) + Range.encode(O, BaseAddr); +} + +void AddressRanges::decode(DataExtractor &Data, uint64_t BaseAddr, + uint64_t &Offset) { + clear(); + uint64_t NumRanges = Data.getULEB128(&Offset); + if (NumRanges == 0) + return; + Ranges.resize(NumRanges); + for (auto &Range : Ranges) + Range.decode(Data, BaseAddr, Offset); +} Index: unittests/DebugInfo/GSYM/GSYMTest.cpp =================================================================== --- unittests/DebugInfo/GSYM/GSYMTest.cpp +++ unittests/DebugInfo/GSYM/GSYMTest.cpp @@ -8,12 +8,15 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallString.h" #include "llvm/DebugInfo/GSYM/FileEntry.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" #include "llvm/DebugInfo/GSYM/FunctionInfo.h" #include "llvm/DebugInfo/GSYM/InlineInfo.h" #include "llvm/DebugInfo/GSYM/Range.h" #include "llvm/DebugInfo/GSYM/StringTable.h" -#include "llvm/Testing/Support/Error.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Endian.h" #include "gtest/gtest.h" #include @@ -379,3 +382,118 @@ // Test pointing to past end gets empty string. EXPECT_EQ(StrTab.getString(13), ""); } + +static void TestFileWriterHelper(llvm::support::endianness ByteOrder) { + SmallString<512> Str; + raw_svector_ostream OutStrm(Str); + FileWriter FW(OutStrm, ByteOrder); + const int64_t MinSLEB = INT64_MIN; + const int64_t MaxSLEB = INT64_MAX; + const uint64_t MinULEB = 0; + const uint64_t MaxULEB = UINT64_MAX; + const uint8_t U8 = 0x10; + const uint16_t U16 = 0x1122; + const uint32_t U32 = 0x12345678; + const uint64_t U64 = 0x33445566778899aa; + const char *Hello = "hello"; + FW.writeU8(U8); + FW.writeU16(U16); + FW.writeU32(U32); + FW.writeU64(U64); + FW.alignTo(16); + const off_t FixupOffset = FW.tell(); + FW.writeU32(0); + FW.writeSLEB(MinSLEB); + FW.writeSLEB(MaxSLEB); + FW.writeULEB(MinULEB); + FW.writeULEB(MaxULEB); + FW.writeNullTerminated(Hello); + // Test Seek, Tell using Fixup32. + FW.fixup32(U32, FixupOffset); + + std::string Bytes(OutStrm.str()); + uint8_t AddressSize = 4; + DataExtractor Data(Bytes, ByteOrder == llvm::support::little, AddressSize); + uint64_t Offset = 0; + EXPECT_EQ(Data.getU8(&Offset), U8); + EXPECT_EQ(Data.getU16(&Offset), U16); + EXPECT_EQ(Data.getU32(&Offset), U32); + EXPECT_EQ(Data.getU64(&Offset), U64); + Offset = alignTo(Offset, 16); + EXPECT_EQ(Data.getU32(&Offset), U32); + EXPECT_EQ(Data.getSLEB128(&Offset), MinSLEB); + EXPECT_EQ(Data.getSLEB128(&Offset), MaxSLEB); + EXPECT_EQ(Data.getULEB128(&Offset), MinULEB); + EXPECT_EQ(Data.getULEB128(&Offset), MaxULEB); + EXPECT_EQ(Data.getCStrRef(&Offset), StringRef(Hello)); +} + +TEST(GSYMTest, TestFileWriter) { + TestFileWriterHelper(llvm::support::little); + TestFileWriterHelper(llvm::support::big); +} + +TEST(GSYMTest, TestAddressRangeEncodeDecode) { + // Test encoding and decoding AddressRange objects. AddressRange objects + // are always stored as offsets from the a base address. The base address + // is the FunctionInfo's base address for function level ranges, and is + // the base address of the parent range for subranges. + SmallString<512> Str; + raw_svector_ostream OutStrm(Str); + const auto ByteOrder = llvm::support::endian::system_endianness(); + FileWriter FW(OutStrm, ByteOrder); + const uint64_t BaseAddr = 0x1000; + const AddressRange Range1(0x1000, 0x1010); + const AddressRange Range2(0x1020, 0x1030); + Range1.encode(FW, BaseAddr); + Range2.encode(FW, BaseAddr); + std::string Bytes(OutStrm.str()); + uint8_t AddressSize = 4; + DataExtractor Data(Bytes, ByteOrder == llvm::support::little, AddressSize); + + AddressRange DecodedRange1, DecodedRange2; + uint64_t Offset = 0; + DecodedRange1.decode(Data, BaseAddr, Offset); + DecodedRange2.decode(Data, BaseAddr, Offset); + EXPECT_EQ(Range1, DecodedRange1); + EXPECT_EQ(Range2, DecodedRange2); +} + +static void TestAddressRangeEncodeDecodeHelper(const AddressRanges &Ranges, + const uint64_t BaseAddr) { + SmallString<512> Str; + raw_svector_ostream OutStrm(Str); + const auto ByteOrder = llvm::support::endian::system_endianness(); + FileWriter FW(OutStrm, ByteOrder); + Ranges.encode(FW, BaseAddr); + + std::string Bytes(OutStrm.str()); + uint8_t AddressSize = 4; + DataExtractor Data(Bytes, ByteOrder == llvm::support::little, AddressSize); + + AddressRanges DecodedRanges; + uint64_t Offset = 0; + DecodedRanges.decode(Data, BaseAddr, Offset); + EXPECT_EQ(Ranges, DecodedRanges); +} + +TEST(GSYMTest, TestAddressRangesEncodeDecode) { + // Test encoding and decoding AddressRanges. AddressRanges objects contain + // ranges that are stored as offsets from the a base address. The base address + // is the FunctionInfo's base address for function level ranges, and is the + // base address of the parent range for subranges. + const uint64_t BaseAddr = 0x1000; + + // Test encoding and decoding with no ranges. + AddressRanges Ranges; + TestAddressRangeEncodeDecodeHelper(Ranges, BaseAddr); + + // Test encoding and decoding with 1 range. + Ranges.insert(AddressRange(0x1000, 0x1010)); + TestAddressRangeEncodeDecodeHelper(Ranges, BaseAddr); + + // Test encoding and decoding with multiple ranges. + Ranges.insert(AddressRange(0x1020, 0x1030)); + Ranges.insert(AddressRange(0x1050, 0x1070)); + TestAddressRangeEncodeDecodeHelper(Ranges, BaseAddr); +}