Index: include/llvm/DebugInfo/GSYM/FileWriter.h =================================================================== --- include/llvm/DebugInfo/GSYM/FileWriter.h +++ include/llvm/DebugInfo/GSYM/FileWriter.h @@ -0,0 +1,57 @@ +//===- FileWriter.h ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_GSYM_FILEWRITER_H +#define LLVM_DEBUGINFO_GSYM_FILEWRITER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/Endian.h" + +#include +#include +#include +#include + +namespace llvm { +namespace gsym { + +/// A simplified binary data writer class that doesn't require targets, target +/// definitions, architectures, or require any other optional compile time +/// libraries to be enabled via the build process. This class needs the ability +/// to seek to different spots in the binary stream that is produces to fixup +/// offsets and sizes. +class FileWriter { + std::ostream &OS; + llvm::support::endianness ByteOrder; +public: + FileWriter(std::ostream &S, llvm::support::endianness B) : OS(S), ByteOrder(B) {} + ~FileWriter(); + bool writeU8(uint8_t Value); + bool writeU16(uint16_t Value); + bool writeU32(uint32_t Value); + bool writeU64(uint64_t Value); + bool writeSLEB(int64_t Value); + bool writeULEB(uint64_t Value); + bool writeUnsigned(uint64_t Value, size_t ByteSize); + bool writeData(llvm::ArrayRef Data); + bool writeCStr(const char *CStr); + bool fixup32(uint32_t Value, off_t Offset); + bool alignTo(size_t Align); + off_t seek(off_t Offset); + off_t tell(); + +private: + FileWriter(const FileWriter &rhs) = delete; + void operator=(const FileWriter &rhs) = delete; +}; + +} // namespace gsym +} // namespace llvm + +#endif // #ifndef LLVM_DEBUGINFO_GSYM_FILEWRITER_H Index: include/llvm/DebugInfo/GSYM/Range.h =================================================================== --- include/llvm/DebugInfo/GSYM/Range.h +++ include/llvm/DebugInfo/GSYM/Range.h @@ -21,10 +21,13 @@ #define HEX64(v) llvm::format_hex(v, 18) namespace llvm { +class DataExtractor; class raw_ostream; namespace gsym { +class FileWriter; + /// A class that represents an address range. The range is specified using /// a start and an end address. class AddressRange { @@ -65,6 +68,16 @@ } return false; } + /// AddressRange objects are encoded and decoded to be relative to a base + /// address. This will be the FunctionInfo's start address if the AddressRange + /// is directly contained in a FunctionInfo, or a base address of the + /// containing parent AddressRange or AddressRanges. This allows address + /// ranges to be efficiently encoded using ULEB128 encodings as we encode the + /// offset and size of each range instead of full addresses. This also makes + /// encoded addresses easy to relocate as we just need to relocate one base + /// address. + void decode(DataExtractor &Data, uint64_t BaseAddr, uint32_t &Offset); + void encode(FileWriter &O, uint64_t BaseAddr) const; }; inline bool operator==(const AddressRange &LHS, const AddressRange &RHS) { @@ -113,6 +126,12 @@ } Collection::const_iterator begin() const { return Ranges.begin(); } Collection::const_iterator end() const { return Ranges.end(); } + + /// Address ranges are decoded and encoded to be relative to a base address. + /// See the AddressRange comment for the encode and decode methods for full + /// details. + void decode(DataExtractor &Data, uint64_t BaseAddr, uint32_t &Offset); + void encode(FileWriter &O, uint64_t BaseAddr) const; }; raw_ostream &operator<<(raw_ostream &OS, const AddressRanges &AR); Index: lib/DebugInfo/GSYM/CMakeLists.txt =================================================================== --- lib/DebugInfo/GSYM/CMakeLists.txt +++ lib/DebugInfo/GSYM/CMakeLists.txt @@ -1,4 +1,5 @@ add_llvm_library(LLVMDebugInfoGSYM + FileWriter.cpp FunctionInfo.cpp InlineInfo.cpp Range.cpp Index: lib/DebugInfo/GSYM/FileWriter.cpp =================================================================== --- lib/DebugInfo/GSYM/FileWriter.cpp +++ lib/DebugInfo/GSYM/FileWriter.cpp @@ -0,0 +1,106 @@ +//===- FileWriter.cpp -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/Support/LEB128.h" +#include + +using namespace llvm; +using namespace gsym; + +FileWriter::~FileWriter() { OS.flush(); } + +bool FileWriter::writeSLEB(int64_t S) { + uint8_t Bytes[32]; + auto Length = encodeSLEB128(S, Bytes); + assert(Length < sizeof(Bytes)); + return writeData(ArrayRef(Bytes, Length)); +} + +bool FileWriter::writeULEB(uint64_t U) { + uint8_t Bytes[32]; + auto Length = encodeULEB128(U, Bytes); + assert(Length < sizeof(Bytes)); + return writeData(ArrayRef(Bytes, Length)); +} + +bool FileWriter::writeU8(uint8_t U) { + return writeData(ArrayRef(&U, sizeof(U))); +} + +bool FileWriter::writeU16(uint16_t U) { + const uint16_t Swapped = support::endian::byte_swap(U, ByteOrder); + return writeData(ArrayRef((const uint8_t *)&Swapped, sizeof(Swapped))); +} + +bool FileWriter::writeU32(uint32_t U) { + const uint32_t Swapped = support::endian::byte_swap(U, ByteOrder); + return writeData(ArrayRef((const uint8_t *)&Swapped, sizeof(Swapped))); +} + +bool FileWriter::writeU64(uint64_t U) { + const uint64_t Swapped = support::endian::byte_swap(U, ByteOrder); + return writeData(ArrayRef((const uint8_t *)&Swapped, sizeof(Swapped))); +} + +bool FileWriter::fixup32(uint32_t U, off_t Offset) { + const off_t CurrOffset = tell(); + if (CurrOffset == -1) + return false; + if (seek(Offset) != Offset) + return false; + if (!writeU32(U)) + return false; + return seek(CurrOffset) == CurrOffset; +} + +bool FileWriter::writeUnsigned(uint64_t U, size_t ByteSize) { + switch (ByteSize) { + case 1: return writeU8(static_cast(U)); + case 2: return writeU16(static_cast(U)); + case 4: return writeU32(static_cast(U)); + case 8: return writeU64(U); + } + return false; +} + +bool FileWriter::writeData(llvm::ArrayRef Data) { + if (Data.empty()) + return false; + OS.write((const char *)Data.data(), Data.size()); + return OS.good(); +} + +bool FileWriter::writeCStr(const char *CStr) { + if (CStr == nullptr) + return false; + return writeData(ArrayRef((const uint8_t *)CStr, strlen(CStr) + 1)); +} + +off_t FileWriter::tell() { return OS.tellp(); } + +off_t FileWriter::seek(off_t Offset) { + OS.seekp(Offset); + return OS.good() ? Offset : -1; +} + +bool FileWriter::alignTo(size_t Align) { + off_t Offset = tell(); + assert(Offset != -1); + if (Offset == -1) + return false; + off_t AlignedOffset = (Offset + Align - 1) / Align * Align; + if (AlignedOffset == Offset) + return true; + off_t PadCount = AlignedOffset - Offset; + std::string PadBytes(PadCount, '\0'); + auto Success = writeData(ArrayRef((const uint8_t *)PadBytes.data(), PadBytes.size())); + assert(tell() == AlignedOffset); + return Success; +} Index: lib/DebugInfo/GSYM/Range.cpp =================================================================== --- lib/DebugInfo/GSYM/Range.cpp +++ lib/DebugInfo/GSYM/Range.cpp @@ -8,6 +8,8 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/GSYM/Range.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" +#include "llvm/Support/DataExtractor.h" #include #include @@ -69,3 +71,34 @@ return OS; } +void AddressRange::encode(FileWriter &O, uint64_t BaseAddr) const { + assert(startAddress() >= BaseAddr); + O.writeULEB(startAddress() - BaseAddr); + O.writeULEB(size()); +} + +void AddressRange::decode(DataExtractor &Data, uint64_t BaseAddr, uint32_t &Offset) { + const uint64_t AddrOffset = Data.getULEB128(&Offset); + const uint64_t Size = Data.getULEB128(&Offset); + const uint64_t StartAddr = BaseAddr + AddrOffset; + setStartAddress(StartAddr); + setEndAddress(StartAddr + Size); +} + +void AddressRanges::encode(FileWriter &O, uint64_t BaseAddr) const { + O.writeULEB(Ranges.size()); + if (Ranges.empty()) + return; + for (auto Range : Ranges) + Range.encode(O, BaseAddr); +} + +void AddressRanges::decode(DataExtractor &Data, uint64_t BaseAddr, uint32_t &Offset) { + clear(); + uint64_t NumRanges = Data.getULEB128(&Offset); + if (NumRanges == 0) + return; + Ranges.resize(NumRanges); + for (auto &Range: Ranges) + Range.decode(Data, BaseAddr, Offset); +} Index: unittests/DebugInfo/GSYM/GSYMTest.cpp =================================================================== --- unittests/DebugInfo/GSYM/GSYMTest.cpp +++ unittests/DebugInfo/GSYM/GSYMTest.cpp @@ -9,10 +9,13 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/DebugInfo/GSYM/FileEntry.h" +#include "llvm/DebugInfo/GSYM/FileWriter.h" #include "llvm/DebugInfo/GSYM/FunctionInfo.h" #include "llvm/DebugInfo/GSYM/InlineInfo.h" #include "llvm/DebugInfo/GSYM/Range.h" #include "llvm/DebugInfo/GSYM/StringTable.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Endian.h" #include "llvm/Testing/Support/Error.h" #include "gtest/gtest.h" @@ -388,3 +391,114 @@ // Test pointing to past end gets empty string. EXPECT_EQ(StrTab.getString(13), ""); } + +static void TestFileWriterHelper(llvm::support::endianness ByteOrder) { + std::stringstream OutStrm; + FileWriter FW(OutStrm, ByteOrder); + const int64_t MinSLEB = INT64_MIN; + const int64_t MaxSLEB = INT64_MAX; + const uint64_t MinULEB = 0; + const uint64_t MaxULEB = UINT64_MAX; + const uint8_t U8 = 0x10; + const uint16_t U16 = 0x1122; + const uint32_t U32 = 0x12345678; + const uint64_t U64 = 0x33445566778899aa; + const char *Hello = "hello"; + FW.writeU8(U8); + FW.writeU16(U16); + FW.writeU32(U32); + FW.writeU64(U64); + FW.alignTo(16); + const off_t FixupOffset = FW.tell(); + FW.writeU32(0); + FW.writeSLEB(MinSLEB); + FW.writeSLEB(MaxSLEB); + FW.writeULEB(MinULEB); + FW.writeULEB(MaxULEB); + FW.writeCStr(Hello); + // Test Seek, Tell using Fixup32. + FW.fixup32(U32, FixupOffset); + + std::string Bytes(OutStrm.str()); + uint8_t AddressSize = 4; + DataExtractor Data(Bytes, ByteOrder == llvm::support::little, AddressSize); + uint32_t Offset = 0; + EXPECT_EQ(Data.getU8(&Offset), U8); + EXPECT_EQ(Data.getU16(&Offset), U16); + EXPECT_EQ(Data.getU32(&Offset), U32); + EXPECT_EQ(Data.getU64(&Offset), U64); + Offset = alignTo(Offset, 16); + EXPECT_EQ(Data.getU32(&Offset), U32); + EXPECT_EQ(Data.getSLEB128(&Offset), MinSLEB); + EXPECT_EQ(Data.getSLEB128(&Offset), MaxSLEB); + EXPECT_EQ(Data.getULEB128(&Offset), MinULEB); + EXPECT_EQ(Data.getULEB128(&Offset), MaxULEB); + EXPECT_EQ(Data.getCStrRef(&Offset), StringRef(Hello)); +} + +TEST(GSYMTest, TestFileWriter) { + TestFileWriterHelper(llvm::support::little); + TestFileWriterHelper(llvm::support::big); +} + +TEST(GSYMTest, TestAddressRangeEncodeDecode) { + // Test encoding and decoding AddressRange objects. AddressRange objects + // are always stored as offsets from the a base address. The base address + // is the FunctionInfo's base address for function level ranges, and is + // the base address of the parent range for subranges. + std::stringstream OutStrm; + const auto ByteOrder = llvm::support::endian::system_endianness(); + FileWriter FW(OutStrm, ByteOrder); + const uint64_t BaseAddr = 0x1000; + const AddressRange Range1(0x1000, 0x1010); + const AddressRange Range2(0x1020, 0x1030); + Range1.encode(FW, BaseAddr); + Range2.encode(FW, BaseAddr); + + std::string Bytes(OutStrm.str()); + uint8_t AddressSize = 4; + DataExtractor Data(Bytes, ByteOrder == llvm::support::little, AddressSize); + + AddressRange DecodedRange1, DecodedRange2; + uint32_t Offset = 0; + DecodedRange1.decode(Data, BaseAddr, Offset); + DecodedRange2.decode(Data, BaseAddr, Offset); + EXPECT_EQ(Range1, DecodedRange1); + EXPECT_EQ(Range2, DecodedRange2); +} + +static void TestAddressRangeEncodeDecodeHelper(const AddressRanges &Ranges, const uint64_t BaseAddr) { + std::stringstream OutStrm; + const auto ByteOrder = llvm::support::endian::system_endianness(); + FileWriter FW(OutStrm, ByteOrder); + Ranges.encode(FW, BaseAddr); + std::string Bytes(OutStrm.str()); + uint8_t AddressSize = 4; + DataExtractor Data(Bytes, ByteOrder == llvm::support::little, AddressSize); + + AddressRanges DecodedRanges; + uint32_t Offset = 0; + DecodedRanges.decode(Data, BaseAddr, Offset); + EXPECT_EQ(Ranges, DecodedRanges); +} + +TEST(GSYMTest, TestAddressRangesEncodeDecode) { + // Test encoding and decoding AddressRanges. AddressRanges objects contain + // ranges that are stored as offsets from the a base address. The base address + // is the FunctionInfo's base address for function level ranges, and is the + // base address of the parent range for subranges. + const uint64_t BaseAddr = 0x1000; + + // Test encoding and decoding with no ranges. + AddressRanges Ranges; + TestAddressRangeEncodeDecodeHelper(Ranges, BaseAddr); + + // Test encoding and decoding with 1 range. + Ranges.insert(AddressRange(0x1000, 0x1010)); + TestAddressRangeEncodeDecodeHelper(Ranges, BaseAddr); + + // Test encoding and decoding with multiple ranges. + Ranges.insert(AddressRange(0x1020, 0x1030)); + Ranges.insert(AddressRange(0x1050, 0x1070)); + TestAddressRangeEncodeDecodeHelper(Ranges, BaseAddr); +}