Index: include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h =================================================================== --- include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h +++ include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h @@ -10,20 +10,36 @@ #ifndef LLVM_LIB_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H #define LLVM_LIB_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Dwarf.h" +#include namespace llvm { +class DWARFUnit; +class DWARFFormValue; class raw_ostream; class DWARFAbbreviationDeclaration { public: struct AttributeSpec { - AttributeSpec(dwarf::Attribute A, dwarf::Form F) : Attr(A), Form(F) {} + AttributeSpec(dwarf::Attribute A, dwarf::Form F, Optional S) + : Attr(A), Form(F), ByteSize(S) {} dwarf::Attribute Attr; dwarf::Form Form; + // If ByteSize has a value, then it contains the fixed size in bytes for the + // Form in this object. If ByteSize doesn't have a value, then the byte size + // of Form either varies according to the DWARFUnit that it is contained in + // or the value size varies and must be decoded from the debug information + // in order to determine its size. + Optional ByteSize; + // Get the fixed byte size of this Form if possible. This function might use + // the DWARFUnit to calculate the size of the Form, like for DW_AT_address + // and DW_AT_ref_addr, so this isn't just an accessor for the ByteSize + // member. + Optional getByteSize(const DWARFUnit *U) const; }; typedef SmallVector AttributeSpecVector; @@ -47,17 +63,47 @@ } uint32_t findAttributeIndex(dwarf::Attribute attr) const; + // Efficiently extract an attribute value for a DWARFUnit given the DIE offset + // and the attribute. Returns true if the attribute was successfully extracted + // into FormValue. + bool getAttributeValue(const uint32_t DIEOffset, const dwarf::Attribute Attr, + const DWARFUnit *U, DWARFFormValue &FormValue) const; bool extract(DataExtractor Data, uint32_t* OffsetPtr); void dump(raw_ostream &OS) const; + // Return an optioanl byte size of all attribute data in this abbreviation + // if a constant size can be calculated given a DWARFUnit. This allows DWARF + // parsing to be faster as many DWARF DIEs have a fixed byte size. + Optional getFixedAttributesByteSize(const DWARFUnit *U) const; + private: void clear(); + // A helper structure that can quickly determine the size in bytes of an + // abbreviation declaration. + struct FixedSizeInfo { + // The fixed byte size for fixed size forms. + uint16_t NumBytes; + // Number of DW_FORM_address forms in this abbrevation declaration. + uint8_t NumAddrs; + // Number of DW_FORM_ref_addr forms in this abbrevation declaration. + uint8_t NumRefAddrs; + // Constructor + FixedSizeInfo() : NumBytes(0), NumAddrs(0), NumRefAddrs(0) {} + // Calculate the fixed size in bytes given a DWARFUnit. + size_t getByteSize(const DWARFUnit *U) const; + }; + uint32_t Code; dwarf::Tag Tag; + uint8_t CodeByteSize; bool HasChildren; - AttributeSpecVector AttributeSpecs; + // Keep a map of attributes to attribute index for quick attribute lookups. + std::map AttributeMap; + // If this abbreviation has a fixed byte size then \a FixedAttributeSize + // will have a value. + Optional FixedAttributeSize; }; } Index: include/llvm/DebugInfo/DWARF/DWARFContext.h =================================================================== --- include/llvm/DebugInfo/DWARF/DWARFContext.h +++ include/llvm/DebugInfo/DWARF/DWARFContext.h @@ -284,6 +284,9 @@ public: DWARFContextInMemory(const object::ObjectFile &Obj, const LoadedObjectInfo *L = nullptr); + DWARFContextInMemory(bool IsLittleEndian, uint8_t AddrSize, + StringRef DebugAbbrev, StringRef DebugInfo, + StringRef DebugStr); bool isLittleEndian() const override { return IsLittleEndian; } uint8_t getAddressSize() const override { return AddressSize; } const DWARFSection &getInfoSection() override { return InfoSection; } Index: include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h =================================================================== --- include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h +++ include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h @@ -48,6 +48,10 @@ /// starting at a given offset. If DIE can't be extracted, returns false and /// doesn't change OffsetPtr. bool extractFast(const DWARFUnit *U, uint32_t *OffsetPtr); + /// High performance DWARFDebugInfoEntry should use this call + bool extractFast(const DWARFUnit *U, uint32_t *OffsetPtr, + const DataExtractor &DebugInfoData, + const uint32_t UEndOffset); uint32_t getTag() const { return AbbrevDecl ? AbbrevDecl->getTag() : 0; } bool isNULL() const { return AbbrevDecl == nullptr; } @@ -98,6 +102,10 @@ dwarf::Attribute Attr, uint64_t FailValue) const; + int64_t getAttributeValueAsSignedConstant(const DWARFUnit *U, + dwarf::Attribute Attr, + int64_t FailValue) const; + uint64_t getAttributeValueAsUnsignedConstant(const DWARFUnit *U, dwarf::Attribute Attr, uint64_t FailValue) const; Index: include/llvm/DebugInfo/DWARF/DWARFFormValue.h =================================================================== --- include/llvm/DebugInfo/DWARF/DWARFFormValue.h +++ include/llvm/DebugInfo/DWARF/DWARFFormValue.h @@ -56,6 +56,7 @@ public: DWARFFormValue(dwarf::Form F = dwarf::Form(0)) : Form(F), U(nullptr) {} dwarf::Form getForm() const { return Form; } + void setForm(dwarf::Form F) { Form = F; } bool isFormClass(FormClass FC) const; const DWARFUnit *getUnit() const { return U; } void dump(raw_ostream &OS) const; @@ -81,7 +82,21 @@ Optional getAsAddress() const; Optional getAsSectionOffset() const; Optional> getAsBlock() const; - + // Get the fixed byte size for a given form. If the form always has a fixed + // byte size that doesn't depend on a DWARFUnit, then an Optional with a valid + // value will be returned. If the form can vary in size depending on the + // DWARFUnit (DWARF version, address byte size, or DWARF32/DWARFF64) and the + // DWARFUnit is valid, then an Optional with a valid value is returned. If the + // form is always encoded using a variable length storage format (ULEB/SLEB + // numbers or blocks) or the size depends on a DWARFUnit and the DWARFUnit is + // NULL, then an Optional with no value will be returned. + static Optional getFixedByteSize(dwarf::Form form, + const DWARFUnit *U = nullptr); + // Get the fixed byte size for a given form. If the form is always encoded + // using a variable length storage format (ULEB/SLEB numbers or blocks) then + // an Optional with no value will be returned. + static Optional getFixedByteSize(dwarf::Form Form, uint16_t Version, + uint8_t AddrSize); bool skipValue(DataExtractor debug_info_data, uint32_t *offset_ptr, const DWARFUnit *U) const; static bool skipValue(dwarf::Form form, DataExtractor debug_info_data, @@ -89,9 +104,6 @@ static bool skipValue(dwarf::Form form, DataExtractor debug_info_data, uint32_t *offset_ptr, uint16_t Version, uint8_t AddrSize); - - static ArrayRef getFixedFormSizes(uint8_t AddrSize, - uint16_t Version); private: void dumpString(raw_ostream &OS) const; }; Index: lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp =================================================================== --- lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp +++ lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp @@ -8,6 +8,8 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" +#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" +#include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" @@ -17,8 +19,11 @@ void DWARFAbbreviationDeclaration::clear() { Code = 0; Tag = DW_TAG_null; + CodeByteSize = 0; HasChildren = false; AttributeSpecs.clear(); + AttributeMap.clear(); + FixedAttributeSize.reset(); } DWARFAbbreviationDeclaration::DWARFAbbreviationDeclaration() { @@ -29,10 +34,12 @@ DWARFAbbreviationDeclaration::extract(DataExtractor Data, uint32_t* OffsetPtr) { clear(); + const uint32_t Offset = *OffsetPtr; Code = Data.getULEB128(OffsetPtr); if (Code == 0) { return false; } + CodeByteSize = *OffsetPtr - Offset; Tag = static_cast(Data.getULEB128(OffsetPtr)); if (Tag == DW_TAG_null) { clear(); @@ -40,12 +47,25 @@ } uint8_t ChildrenByte = Data.getU8(OffsetPtr); HasChildren = (ChildrenByte == DW_CHILDREN_yes); + FixedSizeInfo FSI; + bool HasFixedByteSize = true; while (true) { auto A = static_cast(Data.getULEB128(OffsetPtr)); auto F = static_cast
(Data.getULEB128(OffsetPtr)); if (A && F) { - AttributeSpecs.push_back(AttributeSpec(A, F)); + auto FixedFormByteSize = DWARFFormValue::getFixedByteSize(F); + if (HasFixedByteSize) { + if (FixedFormByteSize.hasValue()) + FSI.NumBytes += FixedFormByteSize.getValue(); + else if (F == DW_FORM_addr) + ++FSI.NumAddrs; + else if (F == DW_FORM_ref_addr) + ++FSI.NumRefAddrs; + else + HasFixedByteSize = false; + } + AttributeSpecs.push_back(AttributeSpec(A, F, FixedFormByteSize)); } else if (A == 0 && F == 0) { // We successfully reached the end of this abbreviation declaration // since both attribute and form are zero. @@ -59,6 +79,16 @@ return false; } } + // If this entire abbreviation has a fixed byte size, remember this so we + // can parse DWARF faster. + if (HasFixedByteSize) + FixedAttributeSize = FSI; + + // Make an attribute to index lookup table to make findAttributeIndex fast + uint32_t AttrIndex = 0; + for (const AttributeSpec &Spec : AttributeSpecs) { + AttributeMap[Spec.Attr] = AttrIndex++; + } return true; } @@ -89,10 +119,66 @@ } uint32_t -DWARFAbbreviationDeclaration::findAttributeIndex(dwarf::Attribute attr) const { - for (uint32_t i = 0, e = AttributeSpecs.size(); i != e; ++i) { - if (AttributeSpecs[i].Attr == attr) - return i; - } +DWARFAbbreviationDeclaration::findAttributeIndex(dwarf::Attribute Attr) const { + const auto Iter = AttributeMap.find(Attr); + if (Iter != AttributeMap.end()) + return Iter->second; return -1U; } + +bool DWARFAbbreviationDeclaration::getAttributeValue( + const uint32_t DIEOffset, const dwarf::Attribute Attr, const DWARFUnit *U, + DWARFFormValue &FormValue) const { + const uint32_t MatchAttrIndex = findAttributeIndex(Attr); + if (MatchAttrIndex >= AttributeSpecs.size()) + return false; + + DataExtractor DebugInfoData = U->getDebugInfoExtractor(); + + // Add the byte size of ULEB that for the abbrev Code so we can start skipping + // the attribute data + uint32_t Offset = DIEOffset + CodeByteSize; + uint32_t AttrIndex = 0; + for (const auto &Spec : AttributeSpecs) { + if (MatchAttrIndex == AttrIndex) { + // We have arrived at the attribute to extract, extract if from Offset + FormValue.setForm(Spec.Form); + return FormValue.extractValue(DebugInfoData, &Offset, U); + } + // March Offset along until we get to the attribute we want. + Optional AttrSize = Spec.getByteSize(U); + if (AttrSize.hasValue()) + Offset += AttrSize.getValue(); + else + DWARFFormValue::skipValue(Spec.Form, DebugInfoData, &Offset, U); + ++AttrIndex; + } + return false; +} + +size_t DWARFAbbreviationDeclaration::FixedSizeInfo::getByteSize( + const DWARFUnit *U) const { + assert(U); + size_t ByteSize = NumBytes; + if (NumAddrs > 0) + ByteSize += NumAddrs * U->getAddressByteSize(); + if (NumRefAddrs > 0) + ByteSize += NumRefAddrs * U->getRefAddrByteSize(); + return ByteSize; +} + +// Get the fixed byte size of this Form if the Form is fixed in byte size +Optional DWARFAbbreviationDeclaration::AttributeSpec::getByteSize( + const DWARFUnit *U) const { + if (ByteSize.hasValue()) + return ByteSize; + assert(U); + return DWARFFormValue::getFixedByteSize(Form, U); +} + +Optional DWARFAbbreviationDeclaration::getFixedAttributesByteSize( + const DWARFUnit *U) const { + if (FixedAttributeSize.hasValue()) + return FixedAttributeSize->getByteSize(U); + return None; +} Index: lib/DebugInfo/DWARF/DWARFContext.cpp =================================================================== --- lib/DebugInfo/DWARF/DWARFContext.cpp +++ lib/DebugInfo/DWARF/DWARFContext.cpp @@ -901,4 +901,13 @@ } } +DWARFContextInMemory::DWARFContextInMemory(bool LittleEndian, uint8_t AddrSize, + StringRef DebugAbbrev, + StringRef DebugInfo, + StringRef DebugStr) + : IsLittleEndian(LittleEndian), AddressSize(AddrSize), + AbbrevSection(DebugAbbrev), StringSection(DebugStr) { + InfoSection.Data = DebugInfo; +} + void DWARFContextInMemory::anchor() { } Index: lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp =================================================================== --- lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp +++ lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp @@ -185,9 +185,15 @@ bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFUnit *U, uint32_t *OffsetPtr) { + DataExtractor DebugInfoData = U->getDebugInfoExtractor(); + const uint32_t UEndOffset = U->getNextUnitOffset(); + return extractFast(U, OffsetPtr, DebugInfoData, UEndOffset); +} +bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFUnit *U, + uint32_t *OffsetPtr, + const DataExtractor &DebugInfoData, + const uint32_t UEndOffset) { Offset = *OffsetPtr; - DataExtractor DebugInfoData = U->getDebugInfoExtractor(); - uint32_t UEndOffset = U->getNextUnitOffset(); if (Offset >= UEndOffset || !DebugInfoData.isValidOffset(Offset)) return false; uint64_t AbbrCode = DebugInfoData.getULEB128(OffsetPtr); @@ -202,19 +208,20 @@ *OffsetPtr = Offset; return false; } - ArrayRef FixedFormSizes = DWARFFormValue::getFixedFormSizes( - U->getAddressByteSize(), U->getVersion()); - assert(FixedFormSizes.size() > 0); - + // See if this DIE has attributes that all have fixed byte sizes. If so, we + // can just add these to the offset to get to the next DIE + Optional FixedDIESize = AbbrevDecl->getFixedAttributesByteSize(U); + if (FixedDIESize.hasValue()) { + *OffsetPtr += FixedDIESize.getValue(); + return true; + } // Skip all data in the .debug_info for the attributes for (const auto &AttrSpec : AbbrevDecl->attributes()) { - auto Form = AttrSpec.Form; - - uint8_t FixedFormSize = - (Form < FixedFormSizes.size()) ? FixedFormSizes[Form] : 0; - if (FixedFormSize) - *OffsetPtr += FixedFormSize; - else if (!DWARFFormValue::skipValue(Form, DebugInfoData, OffsetPtr, U)) { + Optional FixedFormSize = AttrSpec.getByteSize(U); + if (FixedFormSize.hasValue()) { + *OffsetPtr += FixedFormSize.getValue(); + } else if (!DWARFFormValue::skipValue(AttrSpec.Form, DebugInfoData, + OffsetPtr, U)) { // Restore the original offset. *OffsetPtr = Offset; return false; @@ -235,27 +242,9 @@ bool DWARFDebugInfoEntryMinimal::getAttributeValue(const DWARFUnit *U, dwarf::Attribute Attr, DWARFFormValue &FormValue) const { - if (!AbbrevDecl) + if (!AbbrevDecl || !U) return false; - - uint32_t AttrIdx = AbbrevDecl->findAttributeIndex(Attr); - if (AttrIdx == -1U) - return false; - - DataExtractor DebugInfoData = U->getDebugInfoExtractor(); - uint32_t DebugInfoOffset = getOffset(); - - // Skip the abbreviation code so we are at the data for the attributes - DebugInfoData.getULEB128(&DebugInfoOffset); - - // Skip preceding attribute values. - for (uint32_t i = 0; i < AttrIdx; ++i) { - DWARFFormValue::skipValue(AbbrevDecl->getFormByIndex(i), - DebugInfoData, &DebugInfoOffset, U); - } - - FormValue = DWARFFormValue(AbbrevDecl->getFormByIndex(AttrIdx)); - return FormValue.extractValue(DebugInfoData, &DebugInfoOffset, U); + return AbbrevDecl->getAttributeValue(Offset, Attr, U, FormValue); } const char *DWARFDebugInfoEntryMinimal::getAttributeValueAsString( @@ -288,6 +277,15 @@ return Result.hasValue() ? Result.getValue() : FailValue; } +int64_t DWARFDebugInfoEntryMinimal::getAttributeValueAsSignedConstant( + const DWARFUnit *U, dwarf::Attribute Attr, int64_t FailValue) const { + DWARFFormValue FormValue; + if (!getAttributeValue(U, Attr, FormValue)) + return FailValue; + Optional Result = FormValue.getAsSignedConstant(); + return Result.hasValue() ? Result.getValue() : FailValue; +} + uint64_t DWARFDebugInfoEntryMinimal::getAttributeValueAsReference( const DWARFUnit *U, dwarf::Attribute Attr, uint64_t FailValue) const { Index: lib/DebugInfo/DWARF/DWARFFormValue.cpp =================================================================== --- lib/DebugInfo/DWARF/DWARFFormValue.cpp +++ lib/DebugInfo/DWARF/DWARFFormValue.cpp @@ -29,54 +29,8 @@ return (Version == 2) ? AddrSize : 4; } -template -ArrayRef makeFixedFormSizesArrayRef() { - static const uint8_t sizes[] = { - 0, // 0x00 unused - AddrSize, // 0x01 DW_FORM_addr - 0, // 0x02 unused - 0, // 0x03 DW_FORM_block2 - 0, // 0x04 DW_FORM_block4 - 2, // 0x05 DW_FORM_data2 - 4, // 0x06 DW_FORM_data4 - 8, // 0x07 DW_FORM_data8 - 0, // 0x08 DW_FORM_string - 0, // 0x09 DW_FORM_block - 0, // 0x0a DW_FORM_block1 - 1, // 0x0b DW_FORM_data1 - 1, // 0x0c DW_FORM_flag - 0, // 0x0d DW_FORM_sdata - 4, // 0x0e DW_FORM_strp - 0, // 0x0f DW_FORM_udata - RefAddrSize, // 0x10 DW_FORM_ref_addr - 1, // 0x11 DW_FORM_ref1 - 2, // 0x12 DW_FORM_ref2 - 4, // 0x13 DW_FORM_ref4 - 8, // 0x14 DW_FORM_ref8 - 0, // 0x15 DW_FORM_ref_udata - 0, // 0x16 DW_FORM_indirect - 4, // 0x17 DW_FORM_sec_offset - 0, // 0x18 DW_FORM_exprloc - 0, // 0x19 DW_FORM_flag_present - }; - return makeArrayRef(sizes); } -} -ArrayRef DWARFFormValue::getFixedFormSizes(uint8_t AddrSize, - uint16_t Version) { - uint8_t RefAddrSize = getRefAddrSize(AddrSize, Version); - if (AddrSize == 4 && RefAddrSize == 4) - return makeFixedFormSizesArrayRef<4, 4>(); - if (AddrSize == 4 && RefAddrSize == 8) - return makeFixedFormSizesArrayRef<4, 8>(); - if (AddrSize == 8 && RefAddrSize == 4) - return makeFixedFormSizesArrayRef<8, 4>(); - if (AddrSize == 8 && RefAddrSize == 8) - return makeFixedFormSizesArrayRef<8, 8>(); - return None; -} - static const DWARFFormValue::FormClass DWARF4FormClasses[] = { DWARFFormValue::FC_Unknown, // 0x0 DWARFFormValue::FC_Address, // 0x01 DW_FORM_addr @@ -108,6 +62,163 @@ DWARFFormValue::FC_Flag, // 0x19 DW_FORM_flag_present }; +Optional DWARFFormValue::getFixedByteSize(dwarf::Form form, + const DWARFUnit *U) { + switch (form) { + case DW_FORM_addr: + if (U) + return U->getAddressByteSize(); + break; + + case DW_FORM_block: // ULEB128 length L followed by L bytes + case DW_FORM_block1: // 1 byte length L followed by L bytes + case DW_FORM_block2: // 2 byte length L followed by L bytes + case DW_FORM_block4: // 4 byte length L followed by L bytes + case DW_FORM_string: // C-string with null terminator + case DW_FORM_sdata: // SLEB128 + case DW_FORM_udata: // ULEB128 + case DW_FORM_ref_udata: // ULEB128 + case DW_FORM_indirect: // ULEB128 + case DW_FORM_exprloc: // ULEB128 length L followed by L bytes + case DW_FORM_strx: // ULEB128 + case DW_FORM_addrx: // ULEB128 + case DW_FORM_loclistx: // ULEB128 + case DW_FORM_rnglistx: // ULEB128 + case DW_FORM_GNU_addr_index: // ULEB128 + case DW_FORM_GNU_str_index: // ULEB128 + break; + + case DW_FORM_ref_addr: + if (U) + return U->getRefAddrByteSize(); + break; + + case DW_FORM_flag: + case DW_FORM_data1: + case DW_FORM_ref1: + return 1; + + case DW_FORM_data2: + case DW_FORM_ref2: + return 2; + + case DW_FORM_data4: + case DW_FORM_ref4: + case DW_FORM_strp: + return 4; + + case DW_FORM_GNU_ref_alt: + case DW_FORM_GNU_strp_alt: + case DW_FORM_line_strp: + case DW_FORM_sec_offset: + case DW_FORM_strp_sup: + case DW_FORM_ref_sup: + // 4 bytes in DWARF32, 8 in DWARF64 + return 4; // FIXME: This DWARF parser currently only handles DWARF32. + + case DW_FORM_data8: + case DW_FORM_ref8: + case DW_FORM_ref_sig8: + return 8; + + case DW_FORM_flag_present: + return 0; + break; + + case DW_FORM_data16: + return 16; + + case DW_FORM_implicit_const: + // The implicit value is stored in the abbreviation as a ULEB128, any + // there no data in debug info. + return 0; + + default: + assert(!"Handle this form in this switch statement"); + break; + } + return None; +} + +Optional DWARFFormValue::getFixedByteSize(dwarf::Form form, + uint16_t Version, + uint8_t AddrSize) { + switch (form) { + case DW_FORM_addr: + return AddrSize; + + case DW_FORM_block: // ULEB128 length L followed by L bytes + case DW_FORM_block1: // 1 byte length L followed by L bytes + case DW_FORM_block2: // 2 byte length L followed by L bytes + case DW_FORM_block4: // 4 byte length L followed by L bytes + case DW_FORM_string: // C-string with null terminator + case DW_FORM_sdata: // SLEB128 + case DW_FORM_udata: // ULEB128 + case DW_FORM_ref_udata: // ULEB128 + case DW_FORM_indirect: // ULEB128 + case DW_FORM_exprloc: // ULEB128 length L followed by L bytes + case DW_FORM_strx: // ULEB128 + case DW_FORM_addrx: // ULEB128 + case DW_FORM_loclistx: // ULEB128 + case DW_FORM_rnglistx: // ULEB128 + case DW_FORM_GNU_addr_index: // ULEB128 + case DW_FORM_GNU_str_index: // ULEB128 + break; + + case DW_FORM_ref_addr: + return (Version == 2) ? AddrSize : 4; + + case DW_FORM_flag: + case DW_FORM_data1: + case DW_FORM_ref1: + return 1; + + case DW_FORM_data2: + case DW_FORM_ref2: + return 2; + + case DW_FORM_data4: + case DW_FORM_ref4: + case DW_FORM_strp: + return 4; + + case DW_FORM_GNU_ref_alt: + case DW_FORM_GNU_strp_alt: + case DW_FORM_line_strp: + case DW_FORM_sec_offset: + case DW_FORM_strp_sup: + case DW_FORM_ref_sup: + // 4 bytes in DWARF32, 8 in DWARF64 + return 4; // FIXME: This DWARF parser currently only handles DWARF32. + + case DW_FORM_data8: + case DW_FORM_ref8: + case DW_FORM_ref_sig8: + return 8; + + case DW_FORM_flag_present: + return 0; + break; + + // TODO: DWARF 5 spec doesn't specify how these are encoded as of + // 10/31/2016. + break; + + case DW_FORM_data16: + return 16; + + case DW_FORM_implicit_const: + // The implicit value is stored in the abbreviation as a ULEB128, any + // there no data in debug info. + return 0; + + default: + assert(!"Handle this form in this switch statement"); + break; + } + return None; +} + bool DWARFFormValue::isFormClass(DWARFFormValue::FormClass FC) const { // First, check DWARF4 form classes. if (Form < makeArrayRef(DWARF4FormClasses).size() && @@ -256,22 +367,9 @@ return true; } -bool -DWARFFormValue::skipValue(DataExtractor debug_info_data, uint32_t* offset_ptr, - const DWARFUnit *U) const { - return DWARFFormValue::skipValue(Form, debug_info_data, offset_ptr, U); -} - -bool -DWARFFormValue::skipValue(dwarf::Form form, DataExtractor debug_info_data, - uint32_t *offset_ptr, const DWARFUnit *cu) { - return skipValue(form, debug_info_data, offset_ptr, cu->getVersion(), - cu->getAddressByteSize()); -} -bool -DWARFFormValue::skipValue(dwarf::Form form, DataExtractor debug_info_data, - uint32_t *offset_ptr, uint16_t Version, - uint8_t AddrSize) { +bool skipVariableLengthValue(dwarf::Form form, + const DataExtractor &debug_info_data, + uint32_t *offset_ptr, const DWARFUnit *U) { bool indirect = false; do { switch (form) { @@ -304,51 +402,48 @@ debug_info_data.getCStr(offset_ptr); return true; - // Compile unit address sized values case DW_FORM_addr: - *offset_ptr += AddrSize; - return true; case DW_FORM_ref_addr: - *offset_ptr += getRefAddrSize(AddrSize, Version); - return true; - - // 0 byte values - implied from the form. case DW_FORM_flag_present: - return true; - - // 1 byte values case DW_FORM_data1: case DW_FORM_flag: case DW_FORM_ref1: - *offset_ptr += 1; - return true; - - // 2 byte values case DW_FORM_data2: case DW_FORM_ref2: - *offset_ptr += 2; - return true; - - // 4 byte values case DW_FORM_data4: case DW_FORM_ref4: - *offset_ptr += 4; - return true; - - // 8 byte values case DW_FORM_data8: case DW_FORM_ref8: case DW_FORM_ref_sig8: - *offset_ptr += 8; - return true; + case DW_FORM_sec_offset: + case DW_FORM_strp: + case DW_FORM_GNU_ref_alt: + case DW_FORM_GNU_strp_alt: + if (indirect) { + Optional FixedByteSize = + DWARFFormValue::getFixedByteSize(form, U); + if (FixedByteSize.hasValue()) { + *offset_ptr += FixedByteSize.getValue(); + return true; + } + } else { + assert(!"only variable length Form can be passed to this function"); + } + return false; // signed or unsigned LEB 128 values - // case DW_FORM_APPLE_db_str: case DW_FORM_sdata: + debug_info_data.getSLEB128(offset_ptr); + return true; + case DW_FORM_udata: case DW_FORM_ref_udata: + case DW_FORM_strx: + case DW_FORM_addrx: + case DW_FORM_loclistx: + case DW_FORM_rnglistx: + case DW_FORM_GNU_addr_index: case DW_FORM_GNU_str_index: - case DW_FORM_GNU_addr_index: debug_info_data.getULEB128(offset_ptr); return true; @@ -357,14 +452,6 @@ form = static_cast(debug_info_data.getULEB128(offset_ptr)); break; - // FIXME: 4 for DWARF32, 8 for DWARF64. - case DW_FORM_sec_offset: - case DW_FORM_strp: - case DW_FORM_GNU_ref_alt: - case DW_FORM_GNU_strp_alt: - *offset_ptr += 4; - return true; - default: return false; } @@ -372,6 +459,38 @@ return true; } +bool DWARFFormValue::skipValue(DataExtractor debug_info_data, + uint32_t *offset_ptr, const DWARFUnit *U) const { + return DWARFFormValue::skipValue(Form, debug_info_data, offset_ptr, U); +} + +bool DWARFFormValue::skipValue(dwarf::Form form, DataExtractor debug_info_data, + uint32_t *offset_ptr, const DWARFUnit *U) { + Optional FixedByteSize = getFixedByteSize(form, U); + if (FixedByteSize.hasValue()) { + *offset_ptr += FixedByteSize.getValue(); + return true; + } + return skipVariableLengthValue(form, debug_info_data, offset_ptr, U); +} + +bool DWARFFormValue::skipValue(dwarf::Form form, DataExtractor debug_info_data, + uint32_t *offset_ptr, uint16_t Version, + uint8_t AddrSize) { + Optional FixedByteSize = getFixedByteSize(form, nullptr); + if (FixedByteSize.hasValue()) { + *offset_ptr += FixedByteSize.getValue(); + return true; + } else if (form == DW_FORM_addr) { + *offset_ptr += AddrSize; + return true; + } else if (form == DW_FORM_ref_addr) { + *offset_ptr += getRefAddrSize(AddrSize, Version); + return true; + } + return skipVariableLengthValue(form, debug_info_data, offset_ptr, nullptr); +} + void DWARFFormValue::dump(raw_ostream &OS) const { uint64_t uvalue = Value.uval; @@ -556,8 +675,9 @@ return None; return Value.uval + U->getOffset(); case DW_FORM_ref_addr: + case DW_FORM_ref_sig8: + case DW_FORM_GNU_ref_alt: return Value.uval; - // FIXME: Add proper support for DW_FORM_ref_sig8 and DW_FORM_GNU_ref_alt. default: return None; } Index: lib/DebugInfo/DWARF/DWARFUnit.cpp =================================================================== --- lib/DebugInfo/DWARF/DWARFUnit.cpp +++ lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -196,10 +196,11 @@ uint32_t DIEOffset = Offset + getHeaderSize(); uint32_t NextCUOffset = getNextUnitOffset(); DWARFDebugInfoEntryMinimal DIE; + DataExtractor DebugInfoData = getDebugInfoExtractor(); uint32_t Depth = 0; bool IsCUDie = true; - while (DIEOffset < NextCUOffset && DIE.extractFast(this, &DIEOffset)) { + while (DIE.extractFast(this, &DIEOffset, DebugInfoData, NextCUOffset)) { if (IsCUDie) { if (AppendCUDie) Dies.push_back(DIE); Index: unittests/DebugInfo/DWARF/CMakeLists.txt =================================================================== --- unittests/DebugInfo/DWARF/CMakeLists.txt +++ unittests/DebugInfo/DWARF/CMakeLists.txt @@ -3,6 +3,8 @@ ) set(DebugInfoSources + DWARFGenerator.cpp + DWARFDebugInfoTest.cpp DWARFFormValueTest.cpp ) Index: unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp =================================================================== --- unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp +++ unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp @@ -0,0 +1,533 @@ +//===- llvm/unittest/DebugInfo/DWARFFormValueTest.cpp ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "DWARFGenerator.h" +#include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h" +#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" +#include "llvm/DebugInfo/DWARF/DWARFUnit.h" +#include "llvm/Support/Dwarf.h" +#include "gtest/gtest.h" +#include + +using namespace llvm; +using namespace dwarf; +using namespace dwarf_gen; + +namespace { + +inline bool HostIsLittleEndian() { + union { + uint32_t i; + char c[4]; + } e = {0x00000001}; + + return e.c[0]; +} + +TEST(DWARFGenerator, TestStringTable) { + // Make sure that String table reserves offset zero for the empty string and + // that two different strings get appropriate offsets + StringTable Strings; + const char *Str1 = "hello"; + const char *Str2 = "world"; + const char *Str3 = ""; + const size_t Str1Strp = Strings.addString(Str1); + const size_t Str2Strp = Strings.addString(Str2); + const size_t Str3Strp = Strings.addString(Str3); + EXPECT_EQ(Str1Strp, (size_t)1); + EXPECT_EQ(Str2Strp, 1u + strlen(Str1) + 1u); + EXPECT_EQ(Str3Strp, (size_t)0); +} + +template +void TestChildren() { + // Test that we can decode DW_FORM_ref_addr values correctly in DWARF 2 with + // 4 byte addresses. DW_FORM_ref_addr values should be 4 bytes when using + // 8 byte addresses. + const uint8_t AddrSize = sizeof(AddrType); + const bool LittleEndian = HostIsLittleEndian(); + + DWARFGenerator Dwarf; + CompileUnit &CU = Dwarf.appendCompileUnit(Version, AddrSize); + CU.Die.appendAttribute({DW_AT_name, DW_FORM_strp, "/tmp/main.c"}); + CU.Die.appendAttribute({DW_AT_language, DW_FORM_data2, DW_LANG_C}); + + DIE &SubprogramDie = CU.Die.appendChild(DW_TAG_subprogram); + SubprogramDie.appendAttribute({DW_AT_name, DW_FORM_strp, "main"}); + SubprogramDie.appendAttribute({DW_AT_low_pc, DW_FORM_addr, 0x1000U}); + SubprogramDie.appendAttribute({DW_AT_high_pc, DW_FORM_addr, 0x2000U}); + + DIE &IntDie = CU.Die.appendChild(DW_TAG_base_type); + IntDie.appendAttribute({DW_AT_name, DW_FORM_strp, "int"}); + IntDie.appendAttribute({DW_AT_encoding, DW_FORM_data1, DW_ATE_signed}); + IntDie.appendAttribute({DW_AT_byte_size, DW_FORM_data1, 4}); + + DIE &ArgcDie = SubprogramDie.appendChild(DW_TAG_formal_parameter); + ArgcDie.appendAttribute({DW_AT_name, DW_FORM_strp, "argc"}); + ArgcDie.appendAttribute({DW_AT_type, DW_FORM_ref4, &IntDie}); + + DWARFSections DwarfSections; + Dwarf.generate(DwarfSections); + + // DwarfSections.dump(llvm::outs()); + + DWARFContextInMemory dwarfContext( + LittleEndian, AddrSize, DwarfSections.getDebugAbbrevData(), + DwarfSections.getDebugInfoData(), DwarfSections.getDebugStrData()); + uint32_t NumCUs = dwarfContext.getNumCompileUnits(); + EXPECT_EQ(NumCUs, 1u); + DWARFCompileUnit *U = dwarfContext.getCompileUnitAtIndex(0); + auto DiePtr = U->getUnitDIE(false); + EXPECT_TRUE(DiePtr != nullptr); + //DiePtr->dump(llvm::outs(), U, UINT32_MAX); + auto SubprogramDiePtr = DiePtr->getFirstChild(); + EXPECT_TRUE(SubprogramDiePtr != nullptr); + EXPECT_EQ(SubprogramDiePtr->getTag(), DW_TAG_subprogram); + + auto ArgcDiePtr = SubprogramDiePtr->getFirstChild(); + EXPECT_TRUE(ArgcDiePtr != nullptr); + EXPECT_EQ(ArgcDiePtr->getTag(), DW_TAG_formal_parameter); + + auto NullDiePtr = ArgcDiePtr->getSibling(); + EXPECT_TRUE(NullDiePtr != nullptr); + if (NullDiePtr) { + EXPECT_EQ(NullDiePtr->getTag(), DW_TAG_null); + EXPECT_TRUE(NullDiePtr->getSibling() == nullptr); + EXPECT_TRUE(NullDiePtr->getFirstChild() == nullptr); + } + + auto IntDiePtr = SubprogramDiePtr->getSibling(); + EXPECT_TRUE(IntDiePtr != nullptr); + EXPECT_EQ(IntDiePtr->getTag(), DW_TAG_base_type); + + NullDiePtr = IntDiePtr->getSibling(); + EXPECT_TRUE(NullDiePtr != nullptr); + if (NullDiePtr) { + EXPECT_EQ(NullDiePtr->getTag(), DW_TAG_null); + EXPECT_TRUE(NullDiePtr->getSibling() == nullptr); + EXPECT_TRUE(NullDiePtr->getFirstChild() == nullptr); + } +} + +TEST(DWARFDebugInfo, TestDWARF32Version2Addr4Children) { + // Test that we can decode all forms for DWARF32, version 2, with 4 byte + // addresses. + typedef uint32_t AddrType; + // DW_FORM_ref_addr are the same as the address type in DWARF32 version 2. + typedef AddrType RefAddrType; + TestChildren<2, AddrType, RefAddrType>(); +} + +TEST(DWARFDebugInfo, TestDWARF32Version2Addr8Children) { + // Test that we can decode all forms for DWARF32, version 2, with 4 byte + // addresses. + typedef uint64_t AddrType; + // DW_FORM_ref_addr are the same as the address type in DWARF32 version 2. + typedef AddrType RefAddrType; + TestChildren<2, AddrType, RefAddrType>(); +} + +TEST(DWARFDebugInfo, TestDWARF32Version3Addr4Children) { + // Test that we can decode all forms for DWARF32, version 2, with 4 byte + // addresses. + typedef uint32_t AddrType; + // DW_FORM_ref_addr are the same as the address type in DWARF32 version 2. + typedef AddrType RefAddrType; + TestChildren<3, AddrType, RefAddrType>(); +} + +TEST(DWARFDebugInfo, TestDWARF32Version3Addr8Children) { + // Test that we can decode all forms for DWARF32, version 2, with 4 byte + // addresses. + typedef uint64_t AddrType; + // DW_FORM_ref_addr are the same as the address type in DWARF32 version 2. + typedef AddrType RefAddrType; + TestChildren<3, AddrType, RefAddrType>(); +} + +TEST(DWARFDebugInfo, TestDWARF32Version4Addr4Children) { + // Test that we can decode all forms for DWARF32, version 2, with 4 byte + // addresses. + typedef uint32_t AddrType; + // DW_FORM_ref_addr are the same as the address type in DWARF32 version 2. + typedef AddrType RefAddrType; + TestChildren<4, AddrType, RefAddrType>(); +} + +TEST(DWARFDebugInfo, TestDWARF32Version4Addr8Children) { + // Test that we can decode all forms for DWARF32, version 2, with 4 byte + // addresses. + typedef uint64_t AddrType; + // DW_FORM_ref_addr are the same as the address type in DWARF32 version 2. + typedef AddrType RefAddrType; + TestChildren<4, AddrType, RefAddrType>(); +} + +template +void TestAllForms() { + // Test that we can decode all DW_FORM values correctly. + + const uint8_t AddrSize = sizeof(AddrType); + const bool LittleEndian = HostIsLittleEndian(); + const AddrType AddrValue = (AddrType)0x0123456789abcdefULL; + const uint8_t BlockData[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 0}; + const uint32_t BlockSize = sizeof(BlockData); + const RefAddrType RefAddr = 0x12345678; + const uint8_t Data1 = 0x01U; + const uint16_t Data2 = 0x2345U; + const uint32_t Data4 = 0x6789abcdU; + const uint64_t Data8 = 0x0011223344556677ULL; + const uint64_t Data8_2 = 0xAABBCCDDEEFF0011ULL; + const int64_t SData = INT64_MIN; + const uint64_t UData[] = {UINT64_MAX - 1, UINT64_MAX - 2, UINT64_MAX - 3, + UINT64_MAX - 4, UINT64_MAX - 5, UINT64_MAX - 6, + UINT64_MAX - 7, UINT64_MAX - 8, UINT64_MAX - 9}; +#define UDATA_1 18446744073709551614ULL + const uint32_t Dwarf32Values[] = {1, 2, 3, 4, 5, 6, 7, 8}; + const char *StringValue = "Hello"; + const char *StrpValue = "World"; + DWARFGenerator Dwarf; + CompileUnit &CU = Dwarf.appendCompileUnit(Version, AddrSize); + uint16_t Attr = DW_AT_lo_user; + + //---------------------------------------------------------------------- + // Test address forms + //---------------------------------------------------------------------- + const auto Attr_DW_FORM_addr = static_cast(Attr++); + CU.Die.appendAttribute({Attr_DW_FORM_addr, DW_FORM_addr, AddrValue}); + + //---------------------------------------------------------------------- + // Test block forms + //---------------------------------------------------------------------- + const auto Attr_DW_FORM_block = static_cast(Attr++); + CU.Die.appendAttribute( + {Attr_DW_FORM_block, DW_FORM_block, BlockData, BlockSize}); + + const auto Attr_DW_FORM_block1 = static_cast(Attr++); + CU.Die.appendAttribute( + {Attr_DW_FORM_block1, DW_FORM_block1, BlockData, BlockSize}); + + const auto Attr_DW_FORM_block2 = static_cast(Attr++); + CU.Die.appendAttribute( + {Attr_DW_FORM_block2, DW_FORM_block2, BlockData, BlockSize}); + + const auto Attr_DW_FORM_block4 = static_cast(Attr++); + CU.Die.appendAttribute( + {Attr_DW_FORM_block4, DW_FORM_block4, BlockData, BlockSize}); + + const auto Attr_DW_FORM_exprloc = static_cast(Attr++); + CU.Die.appendAttribute( + {Attr_DW_FORM_exprloc, DW_FORM_exprloc, BlockData, BlockSize}); + + //---------------------------------------------------------------------- + // Test data forms + //---------------------------------------------------------------------- + const auto Attr_DW_FORM_data1 = static_cast(Attr++); + CU.Die.appendAttribute({Attr_DW_FORM_data1, DW_FORM_data1, Data1}); + + const auto Attr_DW_FORM_data2 = static_cast(Attr++); + CU.Die.appendAttribute({Attr_DW_FORM_data2, DW_FORM_data2, Data2}); + + const auto Attr_DW_FORM_data4 = static_cast(Attr++); + CU.Die.appendAttribute({Attr_DW_FORM_data4, DW_FORM_data4, Data4}); + + const auto Attr_DW_FORM_data8 = static_cast(Attr++); + CU.Die.appendAttribute({Attr_DW_FORM_data8, DW_FORM_data8, Data8}); + + // const auto Attr_DW_FORM_data16 = static_cast(Attr++); + // CU.Die.appendAttribute({Attr_DW_FORM_data16, DW_FORM_data16}); + + //---------------------------------------------------------------------- + // Test string forms + //---------------------------------------------------------------------- + const auto Attr_DW_FORM_string = static_cast(Attr++); + CU.Die.appendAttribute({Attr_DW_FORM_string, DW_FORM_string, StringValue}); + + const auto Attr_DW_FORM_strp = static_cast(Attr++); + CU.Die.appendAttribute({Attr_DW_FORM_strp, DW_FORM_strp, StrpValue}); + + //---------------------------------------------------------------------- + // Test reference forms + //---------------------------------------------------------------------- + const auto Attr_DW_FORM_ref_addr = static_cast(Attr++); + CU.Die.appendAttribute({Attr_DW_FORM_ref_addr, DW_FORM_ref_addr, RefAddr}); + + const auto Attr_DW_FORM_ref1 = static_cast(Attr++); + CU.Die.appendAttribute({Attr_DW_FORM_ref1, DW_FORM_ref1, Data1}); + + const auto Attr_DW_FORM_ref2 = static_cast(Attr++); + CU.Die.appendAttribute({Attr_DW_FORM_ref2, DW_FORM_ref2, Data2}); + + const auto Attr_DW_FORM_ref4 = static_cast(Attr++); + CU.Die.appendAttribute({Attr_DW_FORM_ref4, DW_FORM_ref4, Data4}); + + const auto Attr_DW_FORM_ref8 = static_cast(Attr++); + CU.Die.appendAttribute({Attr_DW_FORM_ref8, DW_FORM_ref8, Data8}); + + const auto Attr_DW_FORM_ref_sig8 = static_cast(Attr++); + CU.Die.appendAttribute({Attr_DW_FORM_ref_sig8, DW_FORM_ref_sig8, Data8_2}); + + const auto Attr_DW_FORM_ref_udata = static_cast(Attr++); + CU.Die.appendAttribute({Attr_DW_FORM_ref_udata, DW_FORM_ref_udata, UData[0]}); + + //---------------------------------------------------------------------- + // Test flag forms + //---------------------------------------------------------------------- + const auto Attr_DW_FORM_flag_true = static_cast(Attr++); + CU.Die.appendAttribute({Attr_DW_FORM_flag_true, DW_FORM_flag, true}); + + const auto Attr_DW_FORM_flag_false = static_cast(Attr++); + CU.Die.appendAttribute({Attr_DW_FORM_flag_false, DW_FORM_flag, false}); + + const auto Attr_DW_FORM_flag_present = static_cast(Attr++); + CU.Die.appendAttribute({Attr_DW_FORM_flag_present, DW_FORM_flag_present}); + + //---------------------------------------------------------------------- + // Test SLEB128 based forms + //---------------------------------------------------------------------- + const auto Attr_DW_FORM_sdata = static_cast(Attr++); + CU.Die.appendAttribute({Attr_DW_FORM_sdata, DW_FORM_sdata, SData}); + + //---------------------------------------------------------------------- + // Test ULEB128 based forms + //---------------------------------------------------------------------- + const auto Attr_DW_FORM_udata = static_cast(Attr++); + CU.Die.appendAttribute({Attr_DW_FORM_udata, DW_FORM_udata, UData[0]}); + + //---------------------------------------------------------------------- + // Test DWARF32/DWARF64 forms + //---------------------------------------------------------------------- + const auto Attr_DW_FORM_GNU_ref_alt = static_cast(Attr++); + CU.Die.appendAttribute( + {Attr_DW_FORM_GNU_ref_alt, DW_FORM_GNU_ref_alt, Dwarf32Values[0]}); + + const auto Attr_DW_FORM_sec_offset = static_cast(Attr++); + CU.Die.appendAttribute( + {Attr_DW_FORM_sec_offset, DW_FORM_sec_offset, Dwarf32Values[1]}); + + //---------------------------------------------------------------------- + // Add an address at the end to make sure we can decode this value + //---------------------------------------------------------------------- + const auto Attr_Last = static_cast(Attr++); + CU.Die.appendAttribute({Attr_Last, DW_FORM_addr, AddrValue}); + + //---------------------------------------------------------------------- + // Generate the DWARF + //---------------------------------------------------------------------- + DWARFSections DwarfSections; + Dwarf.generate(DwarfSections); + + // DwarfSections.dump(llvm::outs()); + + DWARFContextInMemory dwarfContext( + LittleEndian, AddrSize, DwarfSections.getDebugAbbrevData(), + DwarfSections.getDebugInfoData(), DwarfSections.getDebugStrData()); + uint32_t NumCUs = dwarfContext.getNumCompileUnits(); + EXPECT_EQ(NumCUs, 1u); + DWARFCompileUnit *U = dwarfContext.getCompileUnitAtIndex(0); + auto DiePtr = U->getUnitDIE(false); + EXPECT_TRUE(DiePtr != nullptr); + + //---------------------------------------------------------------------- + // Test address forms + //---------------------------------------------------------------------- + EXPECT_EQ(DiePtr->getAttributeValueAsAddress(U, Attr_DW_FORM_addr, 0), + AddrValue); + + //---------------------------------------------------------------------- + // Test block forms + //---------------------------------------------------------------------- + DWARFFormValue FormValue; + ArrayRef ExtractedBlockData; + Optional> BlockDataOpt; + + EXPECT_TRUE(DiePtr->getAttributeValue(U, Attr_DW_FORM_block, FormValue)); + BlockDataOpt = FormValue.getAsBlock(); + EXPECT_TRUE(BlockDataOpt.hasValue()); + ExtractedBlockData = BlockDataOpt.getValue(); + EXPECT_EQ(ExtractedBlockData.size(), BlockSize); + EXPECT_TRUE(memcmp(ExtractedBlockData.data(), BlockData, BlockSize) == 0); + + EXPECT_TRUE(DiePtr->getAttributeValue(U, Attr_DW_FORM_block1, FormValue)); + BlockDataOpt = FormValue.getAsBlock(); + EXPECT_TRUE(BlockDataOpt.hasValue()); + ExtractedBlockData = BlockDataOpt.getValue(); + EXPECT_EQ(ExtractedBlockData.size(), BlockSize); + EXPECT_TRUE(memcmp(ExtractedBlockData.data(), BlockData, BlockSize) == 0); + + EXPECT_TRUE(DiePtr->getAttributeValue(U, Attr_DW_FORM_block2, FormValue)); + BlockDataOpt = FormValue.getAsBlock(); + EXPECT_TRUE(BlockDataOpt.hasValue()); + ExtractedBlockData = BlockDataOpt.getValue(); + EXPECT_EQ(ExtractedBlockData.size(), BlockSize); + EXPECT_TRUE(memcmp(ExtractedBlockData.data(), BlockData, BlockSize) == 0); + + EXPECT_TRUE(DiePtr->getAttributeValue(U, Attr_DW_FORM_block4, FormValue)); + BlockDataOpt = FormValue.getAsBlock(); + EXPECT_TRUE(BlockDataOpt.hasValue()); + ExtractedBlockData = BlockDataOpt.getValue(); + EXPECT_EQ(ExtractedBlockData.size(), BlockSize); + EXPECT_TRUE(memcmp(ExtractedBlockData.data(), BlockData, BlockSize) == 0); + + EXPECT_TRUE(DiePtr->getAttributeValue(U, Attr_DW_FORM_exprloc, FormValue)); + BlockDataOpt = FormValue.getAsBlock(); + EXPECT_TRUE(BlockDataOpt.hasValue()); + ExtractedBlockData = BlockDataOpt.getValue(); + EXPECT_EQ(ExtractedBlockData.size(), BlockSize); + EXPECT_TRUE(memcmp(ExtractedBlockData.data(), BlockData, BlockSize) == 0); + + //---------------------------------------------------------------------- + // Test data forms + //---------------------------------------------------------------------- + EXPECT_EQ( + DiePtr->getAttributeValueAsUnsignedConstant(U, Attr_DW_FORM_data1, 0), + Data1); + EXPECT_EQ( + DiePtr->getAttributeValueAsUnsignedConstant(U, Attr_DW_FORM_data2, 0), + Data2); + EXPECT_EQ( + DiePtr->getAttributeValueAsUnsignedConstant(U, Attr_DW_FORM_data4, 0), + Data4); + EXPECT_EQ( + DiePtr->getAttributeValueAsUnsignedConstant(U, Attr_DW_FORM_data8, 0), + Data8); + + //---------------------------------------------------------------------- + // Test string forms + //---------------------------------------------------------------------- + const char *ExtractedStringValue = + DiePtr->getAttributeValueAsString(U, Attr_DW_FORM_string, nullptr); + EXPECT_TRUE(ExtractedStringValue != nullptr); + EXPECT_TRUE(strcmp(StringValue, ExtractedStringValue) == 0); + + const char *ExtractedStrpValue = + DiePtr->getAttributeValueAsString(U, Attr_DW_FORM_strp, nullptr); + EXPECT_TRUE(ExtractedStrpValue != nullptr); + EXPECT_TRUE(strcmp(StrpValue, ExtractedStrpValue) == 0); + + //---------------------------------------------------------------------- + // Test reference forms + //---------------------------------------------------------------------- + EXPECT_EQ(DiePtr->getAttributeValueAsReference(U, Attr_DW_FORM_ref_addr, 0), + RefAddr); + EXPECT_EQ(DiePtr->getAttributeValueAsReference(U, Attr_DW_FORM_ref1, 0), + Data1); + EXPECT_EQ(DiePtr->getAttributeValueAsReference(U, Attr_DW_FORM_ref2, 0), + Data2); + EXPECT_EQ(DiePtr->getAttributeValueAsReference(U, Attr_DW_FORM_ref4, 0), + Data4); + EXPECT_EQ(DiePtr->getAttributeValueAsReference(U, Attr_DW_FORM_ref8, 0), + Data8); + EXPECT_EQ(DiePtr->getAttributeValueAsReference(U, Attr_DW_FORM_ref_sig8, 0), + Data8_2); + EXPECT_EQ(DiePtr->getAttributeValueAsReference(U, Attr_DW_FORM_ref_udata, 0), + UData[0]); + + //---------------------------------------------------------------------- + // Test flag forms + //---------------------------------------------------------------------- + EXPECT_EQ(DiePtr->getAttributeValueAsUnsignedConstant( + U, Attr_DW_FORM_flag_true, 0ULL), + 1ULL); + EXPECT_EQ(DiePtr->getAttributeValueAsUnsignedConstant( + U, Attr_DW_FORM_flag_false, 1ULL), + 0ULL); + EXPECT_EQ(DiePtr->getAttributeValueAsUnsignedConstant( + U, Attr_DW_FORM_flag_present, 0ULL), + 1ULL); + + // TODO: test Attr_DW_FORM_implicit_const extraction + + //---------------------------------------------------------------------- + // Test SLEB128 based forms + //---------------------------------------------------------------------- + EXPECT_EQ(DiePtr->getAttributeValueAsSignedConstant(U, Attr_DW_FORM_sdata, 0), + SData); + + //---------------------------------------------------------------------- + // Test ULEB128 based forms + //---------------------------------------------------------------------- + EXPECT_EQ( + DiePtr->getAttributeValueAsUnsignedConstant(U, Attr_DW_FORM_udata, 0), + UData[0]); + + //---------------------------------------------------------------------- + // Test DWARF32/DWARF64 forms + //---------------------------------------------------------------------- + EXPECT_EQ( + DiePtr->getAttributeValueAsReference(U, Attr_DW_FORM_GNU_ref_alt, 0), + Dwarf32Values[0]); + EXPECT_EQ( + DiePtr->getAttributeValueAsSectionOffset(U, Attr_DW_FORM_sec_offset, 0), + Dwarf32Values[1]); + + //---------------------------------------------------------------------- + // Add an address at the end to make sure we can decode this value + //---------------------------------------------------------------------- + EXPECT_EQ(DiePtr->getAttributeValueAsAddress(U, Attr_Last, 0), AddrValue); +} + +TEST(DWARFDebugInfo, TestDWARF32Version2Addr4AllForms) { + // Test that we can decode all forms for DWARF32, version 2, with 4 byte + // addresses. + typedef uint32_t AddrType; + // DW_FORM_ref_addr are the same as the address type in DWARF32 version 2. + typedef AddrType RefAddrType; + TestAllForms<2, AddrType, RefAddrType>(); +} + +TEST(DWARFDebugInfo, TestDWARF32Version2Addr8AllForms) { + // Test that we can decode all forms for DWARF32, version 2, with 4 byte + // addresses. + typedef uint64_t AddrType; + // DW_FORM_ref_addr are the same as the address type in DWARF32 version 2. + typedef AddrType RefAddrType; + TestAllForms<2, AddrType, RefAddrType>(); +} + +TEST(DWARFDebugInfo, TestDWARF32Version3Addr4AllForms) { + // Test that we can decode all forms for DWARF32, version 3, with 4 byte + // addresses. + typedef uint32_t AddrType; + // DW_FORM_ref_addr are 4 bytes in DWARF32 for version 3 and later. + typedef uint32_t RefAddrType; + TestAllForms<3, AddrType, RefAddrType>(); +} + +TEST(DWARFDebugInfo, TestDWARF32Version3Addr8AllForms) { + // Test that we can decode all forms for DWARF32, version 3, with 8 byte + // addresses. + typedef uint64_t AddrType; + // DW_FORM_ref_addr are 4 bytes in DWARF32 for version 3 and later + typedef uint32_t RefAddrType; + TestAllForms<3, AddrType, RefAddrType>(); +} + +TEST(DWARFDebugInfo, TestDWARF32Version4Addr4AllForms) { + // Test that we can decode all forms for DWARF32, version 4, with 4 byte + // addresses. + typedef uint32_t AddrType; + // DW_FORM_ref_addr are 4 bytes in DWARF32 for version 3 and later + typedef uint32_t RefAddrType; + TestAllForms<4, AddrType, RefAddrType>(); +} + +TEST(DWARFDebugInfo, TestDWARF32Version4Addr8AllForms) { + // Test that we can decode all forms for DWARF32, version 4, with 8 byte + // addresses. + typedef uint64_t AddrType; + // DW_FORM_ref_addr are 4 bytes in DWARF32 for version 3 and later + typedef uint32_t RefAddrType; + TestAllForms<4, AddrType, RefAddrType>(); +} + + +} // end anonymous namespace Index: unittests/DebugInfo/DWARF/DWARFFormValueTest.cpp =================================================================== --- unittests/DebugInfo/DWARF/DWARFFormValueTest.cpp +++ unittests/DebugInfo/DWARF/DWARFFormValueTest.cpp @@ -20,19 +20,6 @@ namespace { -TEST(DWARFFormValue, FixedFormSizes) { - // Size of DW_FORM_addr and DW_FORM_ref_addr are equal in DWARF2, - // DW_FORM_ref_addr is always 4 bytes in DWARF32 starting from DWARF3. - ArrayRef sizes = DWARFFormValue::getFixedFormSizes(4, 2); - EXPECT_EQ(sizes[DW_FORM_addr], sizes[DW_FORM_ref_addr]); - sizes = DWARFFormValue::getFixedFormSizes(8, 2); - EXPECT_EQ(sizes[DW_FORM_addr], sizes[DW_FORM_ref_addr]); - sizes = DWARFFormValue::getFixedFormSizes(8, 3); - EXPECT_EQ(4, sizes[DW_FORM_ref_addr]); - // Check that we don't have fixed form sizes for weird address sizes. - EXPECT_EQ(0U, DWARFFormValue::getFixedFormSizes(16, 2).size()); -} - bool isFormClass(dwarf::Form Form, DWARFFormValue::FormClass FC) { return DWARFFormValue(Form).isFormClass(FC); } Index: unittests/DebugInfo/DWARF/DWARFGenerator.h =================================================================== --- unittests/DebugInfo/DWARF/DWARFGenerator.h +++ unittests/DebugInfo/DWARF/DWARFGenerator.h @@ -0,0 +1,321 @@ +//===- llvm/unittest/DebugInfo/DWARFGenerator.h ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include + +namespace dwarf_gen { + +//------------------------------------------------------------------------------ +/// Classes for generating DWARF binary data that can be used in unit tests. +/// +/// All classes contained in this file allow quick generation of DWARF debug +/// information. The debug information generated includes .debug_abbrev, +/// .debug_info and .debug_str sections. +//------------------------------------------------------------------------------ +struct CompileUnit; +struct DIE; +class DWARFGenerator; +class DWARFSections; +//------------------------------------------------------------------------------ +/// String table class. +/// +/// This class will generate a string table with unique C strings. Calling +/// StringTable::addString(const char *) will return a uint32_t offset into the +/// string table that can be used as a DW_FORM_strp form value. After all +/// strings have been added to the table, call StringTable::getData() and use +/// that as the contents of the .debug.str section. +//------------------------------------------------------------------------------ +class StringTable { + std::string Data; + +public: + // Initialize the string table with a leading NULL byte + StringTable() : Data(1, '\0') {} + + void clear() { Data = std::string(1, '\0'); } + + // Add a string to the string table and returns its 32 bit offset. + uint32_t addString(const char *CStr) { + if (CStr && CStr[0]) { + size_t Pos = Data.find(CStr, 0); + if (Pos != std::string::npos) + return (uint32_t)Pos; + uint32_t StringOffset = Data.size(); + // Append the string including the null terminator. + Data.append(CStr, strlen(CStr) + 1); + return StringOffset; + } + return 0; + } + + llvm::StringRef getData() const { return llvm::StringRef(Data); } +}; + +//------------------------------------------------------------------------------ +/// DWARF attribute value encoding values used in AttributeValue below. +//------------------------------------------------------------------------------ +enum EncodingType { Invalid = 0, Block, String, Unsigned, Signed, Die }; + +//------------------------------------------------------------------------------ +/// A class that represents the value that will be used to generate the binary +/// DWARF .debug_info data for a DW_FORM. +//------------------------------------------------------------------------------ +struct AttributeValue { + EncodingType Encoding; + struct BlockType { + const void *Data; + size_t Size; + }; + union { + bool Bool; + uint64_t UVal; + uint64_t SVal; + const char *CStr; + BlockType Block; + DIE *Die; + }; + AttributeValue() : Encoding(EncodingType::Invalid) {} + AttributeValue(bool U) : Encoding(Unsigned) { UVal = U; } + AttributeValue(uint8_t U) : Encoding(Unsigned) { UVal = U; } + AttributeValue(uint16_t U) : Encoding(Unsigned) { UVal = U; } + AttributeValue(uint32_t U) : Encoding(Unsigned) { UVal = U; } + AttributeValue(uint64_t U) : Encoding(Unsigned) { UVal = U; } + AttributeValue(int8_t S) : Encoding(Signed) { SVal = S; } + AttributeValue(int16_t S) : Encoding(Signed) { SVal = S; } + AttributeValue(int32_t S) : Encoding(Signed) { SVal = S; } + AttributeValue(int64_t S) : Encoding(Signed) { SVal = S; } + AttributeValue(const char *S) : Encoding(String) { CStr = S; } + AttributeValue(DIE *D) : Encoding(EncodingType::Die) { Die = D; } + AttributeValue(const void *P, size_t S) : Encoding(EncodingType::Block) { + Block.Data = P; + Block.Size = S; + } +}; + +//------------------------------------------------------------------------------ +/// DWARF debug information is streamed into "Bytes" using the stream \a OS. +/// +/// This class is used to put data into a DWARF section. +//------------------------------------------------------------------------------ +struct Streamer { + llvm::SmallString<64> &Bytes; + llvm::raw_svector_ostream OS; + Streamer(llvm::SmallString<64> &B) : Bytes(B), OS(Bytes) {} + //---------------------------------------------------------------------------- + /// Write the binary bytes for \a value into the stream using the native + /// endian format. + //---------------------------------------------------------------------------- + template void encodeBinaryData(T value) { + OS.write((const char *)&value, sizeof(value)); + } + //---------------------------------------------------------------------------- + /// Encode \a Size raw bytes pointed to by \a Ptr into Bytes. + //---------------------------------------------------------------------------- + void write(const void *Ptr, size_t Size); + //---------------------------------------------------------------------------- + /// Encode an unsigned LEB128 number into Bytes. + //---------------------------------------------------------------------------- + void encodeULEB128(uint64_t U); + //---------------------------------------------------------------------------- + /// Encode an signed LEB128 number into Bytes. + //---------------------------------------------------------------------------- + void encodeSLEB128(int64_t S); +}; + +//------------------------------------------------------------------------------ +/// A class that represents an attribute, its form and its values for an +/// attribute in a DIE. +/// +/// This class makes it easy to add attributes to a DIE and allows us to +/// easily generate the necessary DWARF at a later time. +//------------------------------------------------------------------------------ +struct Attribute { + uint16_t Attr; + uint16_t Form; + AttributeValue Value; + Attribute(uint16_t A, uint16_t F) : Attr(A), Form(F), Value() {} + Attribute(uint16_t A, uint16_t F, bool U) : Attr(A), Form(F), Value(U) {} + Attribute(uint16_t A, uint16_t F, uint8_t U) : Attr(A), Form(F), Value(U) {} + Attribute(uint16_t A, uint16_t F, uint16_t U) : Attr(A), Form(F), Value(U) {} + Attribute(uint16_t A, uint16_t F, uint32_t U) : Attr(A), Form(F), Value(U) {} + Attribute(uint16_t A, uint16_t F, uint64_t U) : Attr(A), Form(F), Value(U) {} + Attribute(uint16_t A, uint16_t F, int8_t S) : Attr(A), Form(F), Value(S) {} + Attribute(uint16_t A, uint16_t F, int16_t S) : Attr(A), Form(F), Value(S) {} + Attribute(uint16_t A, uint16_t F, int32_t S) : Attr(A), Form(F), Value(S) {} + Attribute(uint16_t A, uint16_t F, int64_t S) : Attr(A), Form(F), Value(S) {} + Attribute(uint16_t A, uint16_t F, const char *S) + : Attr(A), Form(F), Value(S) {} + Attribute(uint16_t A, uint16_t F, const void *P, size_t S) + : Attr(A), Form(F), Value(P, S) {} + Attribute(uint16_t A, uint16_t F, DIE *D) : Attr(A), Form(F), Value(D) {} + size_t getDebugInfoByteSize(CompileUnit &CU) const; + bool generate(CompileUnit &CU, Streamer &Abbrev, Streamer &Info, + StringTable &Strings) const; +}; + +//------------------------------------------------------------------------------ +/// A class that represents a DWARF DIE (Debug Information Entry). +/// +/// This class makes it easy to create a new DIE, add attributes to it and then +/// add it as a child of another DIE. This intermediate class is used to +/// generate DWARF debug info so it contains all the information needed to +/// create an abbreviation, and also emit the data into the DWARF byte streams. +//------------------------------------------------------------------------------ +struct DIE { + llvm::dwarf::Tag Tag; + uint64_t Offset; // This gets calculated during preGenerate() + std::vector Attrs; + std::list Children; + DIE *Parent; + void setParent(DIE *P) { Parent = P; } + DIE(llvm::dwarf::Tag T) + : Tag(T), Offset(-1ULL), Attrs(), Children(), Parent(0) {} + void appendAttribute(const Attribute &Attr) { Attrs.push_back(Attr); } + //---------------------------------------------------------------------------- + /// To add DIE as a child of this DIE, you must use this method. The returned + /// DIE can be used to add attributes, and also to add children DIE. + //---------------------------------------------------------------------------- + DIE &appendChild(llvm::dwarf::Tag T) { + Children.push_back(DIE(T)); + DIE &Die = Children.back(); + Die.setParent(this); + return Die; + } + //---------------------------------------------------------------------------- + /// In order to generate the .debug_info for a compile unit DIE, we must know + /// the length of the compile unit. This function will calculate the number of + /// bytes that will be encoded into the .debug_info section. It will calculate + /// all of the data for this DIE, its children and and NULL tags that + /// terminate sibling chains. It also sets the Offset member variable to be + /// correct so that we can calculate DIE references correctly. + //---------------------------------------------------------------------------- + void preGenerate(CompileUnit &CU, uint64_t &Offset); + bool generate(DWARFGenerator &Dwarf, CompileUnit &CU, Streamer &Abbrev, + Streamer &Info, StringTable &Strings) const; +}; + +//------------------------------------------------------------------------------ +/// A class that represents a DWARF compile unit. +/// +/// Instances of these classes are created by instances of the DWARFGenerator +/// class. All information required to generate a DWARF compile unit is +/// contained inside this class. +//------------------------------------------------------------------------------ +struct CompileUnit { + uint64_t Offset; // Gets calculated in preGenerate() + uint32_t Length; // Gets calculated in preGenerate() + uint16_t Version; + uint8_t AddrSize; + DIE Die; + CompileUnit(uint16_t V, uint8_t A) + : Offset(-1ULL), Length(-1U), Version(V), AddrSize(A), + Die(llvm::dwarf::DW_TAG_compile_unit) {} + + //---------------------------------------------------------------------------- + /// Called before we generate the DWARF so we can figure out the Offset and + /// Length of the compile unit and its DIEs before we stream it out. + //---------------------------------------------------------------------------- + void preGenerate(uint64_t &Offset); + + bool generate(DWARFGenerator &Dwarf, Streamer &Abbrev, Streamer &Info, + StringTable &Strings); +}; + +//------------------------------------------------------------------------------ +/// A class that represents a DWARF that needs to be generated. +/// +/// Clients will need to call DWARFGenerator::appendCompileUnit() for each +/// compile unit they need in the DWARF. Each returned CompileUnit contains a +/// compile unit DIE that can have attributes appended to it and child DIEs +/// added. Once all compile units have been added and all DIEs have been +/// modified and created, a call to DWARFGenerator::generate() will create +/// all needed DWARF section data. +//------------------------------------------------------------------------------ +class DWARFGenerator { + typedef std::pair UInt16Pair; + typedef std::vector AttrFormsType; + std::list CompUnits; + uint32_t NextAbbrevCode; + std::map Abbreviations; + +protected: + friend struct DIE; + uint32_t getAbbrevation(const DIE &Die); + +public: + DWARFGenerator() : NextAbbrevCode(1) {} + + CompileUnit &appendCompileUnit(uint16_t Version, uint8_t AddrSize) { + CompUnits.push_back(CompileUnit(Version, AddrSize)); + return CompUnits.back(); + } + + bool generate(DWARFSections &DwarfSections); +}; + +class DWARFSections { + llvm::SmallString<64> DebugAbbrevBytes; + llvm::SmallString<64> DebugInfoBytes; + StringTable Strings; + +protected: + friend class DWARFGenerator; + llvm::SmallString<64> &getDebugAbbrevBytes() { return DebugAbbrevBytes; } + llvm::SmallString<64> &getDebugInfoBytes() { return DebugInfoBytes; } + StringTable &getStringTable() { return Strings; } + + llvm::ArrayRef getDebugAbbrevArray() const { + if (DebugAbbrevBytes.empty()) + return llvm::ArrayRef(); + return llvm::ArrayRef((const uint8_t *)DebugAbbrevBytes.data(), + DebugAbbrevBytes.size()); + } + llvm::ArrayRef getDebugInfoArray() const { + if (DebugInfoBytes.empty()) + return llvm::ArrayRef(); + return llvm::ArrayRef((const uint8_t *)DebugInfoBytes.data(), + DebugInfoBytes.size()); + } + llvm::ArrayRef getDebugStrArray() const { + llvm::StringRef Data = Strings.getData(); + if (Data.empty()) + return llvm::ArrayRef(); + return llvm::ArrayRef((const uint8_t *)Data.data(), Data.size()); + } + +public: + DWARFSections() {} + + void clear() { + DebugAbbrevBytes.clear(); + DebugInfoBytes.clear(); + Strings.clear(); + } + + // Dump all of the DWARF sections as hex bytes with offsets. + void dump(llvm::raw_ostream &OS); + + llvm::StringRef getDebugAbbrevData() const { + return llvm::StringRef(DebugAbbrevBytes.data(), DebugAbbrevBytes.size()); + } + llvm::StringRef getDebugInfoData() const { + return llvm::StringRef(DebugInfoBytes.data(), DebugInfoBytes.size()); + } + llvm::StringRef getDebugStrData() const { return Strings.getData(); } +}; + +} // end dwarf_gen namespace Index: unittests/DebugInfo/DWARF/DWARFGenerator.cpp =================================================================== --- unittests/DebugInfo/DWARF/DWARFGenerator.cpp +++ unittests/DebugInfo/DWARF/DWARFGenerator.cpp @@ -0,0 +1,569 @@ +//===- llvm/unittest/DebugInfo/DWARFFormValueTest.cpp ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "DWARFGenerator.h" +#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/LEB128.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace dwarf; +using namespace dwarf_gen; + +namespace { +uint64_t calculateReferenceValue(CompileUnit &CU, dwarf::Form Form, DIE *Die) { + if (Die == nullptr) + return -1ULL; + if (Die->Offset == -1ULL) + return -1ULL; + if (CU.Offset == -1ULL) + return -1ULL; + + switch (Form) { + case DW_FORM_ref_addr: + return Die->Offset; + + case DW_FORM_ref1: { + uint64_t CUOffset = Die->Offset - CU.Offset; + assert(CUOffset <= UINT8_MAX); + return CUOffset; + } break; + case DW_FORM_ref2: { + uint64_t CUOffset = Die->Offset - CU.Offset; + assert(CUOffset <= UINT16_MAX); + return CUOffset; + } break; + + case DW_FORM_ref4: { + uint64_t CUOffset = Die->Offset - CU.Offset; + assert(CUOffset <= UINT32_MAX); + return CUOffset; + } break; + + case DW_FORM_ref8: { + uint64_t CUOffset = Die->Offset - CU.Offset; + return CUOffset; + } break; + + case DW_FORM_ref_udata: { + uint64_t CUOffset = Die->Offset - CU.Offset; + return CUOffset; + } break; + + default: + assert(!"Unhandled reference type"); + break; + } + return -1ULL; +} +} // end anonymous namespace + +//------------------------------------------------------------------------------ +// dwarf_gen::Streamer +//------------------------------------------------------------------------------ +void Streamer::write(const void *Ptr, size_t Size) { + OS.write((const char *)Ptr, Size); +} +void Streamer::encodeULEB128(uint64_t U) { llvm::encodeULEB128(U, OS); } +void Streamer::encodeSLEB128(int64_t S) { llvm::encodeSLEB128(S, OS); } + +//------------------------------------------------------------------------------ +// dwarf_gen::CompileUnit +//------------------------------------------------------------------------------ +void CompileUnit::preGenerate(uint64_t &DIEOffset) { + // Set our offset prior to calling getDebugInfoByteSize() so the DIEs can + // calculate their offsets correctly. + Offset = DIEOffset; + // Set the correct DIE offset for our first DIE by adding our compile unit + // header byte size + // TODO: DWARF64 fixes needed here as header will be more that 11 bytes + DIEOffset += 11; + // Let all DIEs figure out what their offsets are so we can calcuate DIE + // references correctly. + Die.preGenerate(*this, DIEOffset); + // Calculate the length of the compile unit based on where DIEOffset is after + // calling DIE Die.preGenerate(...) + Length = DIEOffset - Offset - 4; +} +bool CompileUnit::generate(DWARFGenerator &Dwarf, Streamer &Abbrev, + Streamer &Info, StringTable &Strings) { + // Encode compile unit header. + if (Length == -1U) + return false; + Info.encodeBinaryData(Length); // Compile unit length + Info.encodeBinaryData(Version); // DWARF version + Info.encodeBinaryData(0); // .debug_abbrev offset + Info.encodeBinaryData(AddrSize); // Address byte size + return Die.generate(Dwarf, *this, Abbrev, Info, Strings); +} + +//------------------------------------------------------------------------------ +// dwarf_gen::DWARFGenerator +//------------------------------------------------------------------------------ +uint32_t DWARFGenerator::getAbbrevation(const DIE &Die) { + AttrFormsType AttrForms; + AttrForms.push_back(std::make_pair(Die.Tag, Die.Children.empty() ? 0 : 1)); + for (const auto &Attr : Die.Attrs) + AttrForms.push_back(std::make_pair(Attr.Attr, Attr.Form)); + auto Iter = Abbreviations.find(AttrForms); + if (Iter != Abbreviations.end()) + return Iter->second; + const uint32_t AbbrevCode = NextAbbrevCode++; + Abbreviations[AttrForms] = AbbrevCode; + return AbbrevCode; +} +bool DWARFGenerator::generate(DWARFSections &DwarfSections) { + DwarfSections.clear(); + + // Run through and let all DIEs figure out their offsets and let the compile + // units figure out their lengths. + uint64_t Offset = 0; + for (auto &CU : CompUnits) + CU.preGenerate(Offset); + + Streamer Abbrev(DwarfSections.getDebugAbbrevBytes()); + Streamer Info(DwarfSections.getDebugInfoBytes()); + + // Now emit the DWARF bytes into the DWARF streams + for (auto &CU : CompUnits) { + if (!CU.generate(*this, Abbrev, Info, DwarfSections.getStringTable())) + return false; + } + + // Now emit all of the abbeviation declarations. + for (const auto &Pair : Abbreviations) { + // Emit the abbreviation code + Abbrev.encodeULEB128(Pair.second); + // Now emit the Tag/Children and Attr/Form pairs + const AttrFormsType &AttrForms = Pair.first; + for (const auto AttrForm : AttrForms) { + Abbrev.encodeULEB128(AttrForm.first); + Abbrev.encodeULEB128(AttrForm.second); + } + Abbrev.encodeULEB128(0); + Abbrev.encodeULEB128(0); + } + return true; +} + +//------------------------------------------------------------------------------ +// dwarf_gen::DWARFSections +//------------------------------------------------------------------------------ +void dwarf_gen::DWARFSections::dump(raw_ostream &OS) { + + OS << ".debug_abbrev:\n" + << format_hex_bytes_with_ascii(getDebugAbbrevArray(), 0, 16, 1); + OS << "\n.debug_info:\n" + << format_hex_bytes_with_ascii(getDebugInfoArray(), 0, 16, 1); + OS << "\n.debug_str:\n" + << format_hex_bytes_with_ascii(getDebugStrArray(), 0, 16, 1); + OS << '\n'; +} + +//------------------------------------------------------------------------------ +// dwarf_gen::Attribute +//------------------------------------------------------------------------------ +size_t dwarf_gen::Attribute::getDebugInfoByteSize(CompileUnit &CU) const { + Optional FixedSize = DWARFFormValue::getFixedByteSize( + static_cast(Form), CU.Version, CU.AddrSize); + if (FixedSize.hasValue()) + return FixedSize.getValue(); + switch (Form) { + case DW_FORM_block: + case DW_FORM_exprloc: + return getULEB128Size(Value.Block.Size) + Value.Block.Size; + + case DW_FORM_block1: + return 1 + Value.Block.Size; + + case DW_FORM_block2: + return 2 + Value.Block.Size; + + case DW_FORM_block4: + return 4 + Value.Block.Size; + + case DW_FORM_string: + return Value.CStr ? strlen(Value.CStr) + 1 : 1; + + case DW_FORM_sdata: + return getSLEB128Size(Value.SVal); + case DW_FORM_udata: + case DW_FORM_ref_udata: + case DW_FORM_indirect: + case DW_FORM_strx: + case DW_FORM_addrx: + case DW_FORM_loclistx: + case DW_FORM_rnglistx: + case DW_FORM_GNU_addr_index: + case DW_FORM_GNU_str_index: + return getULEB128Size(Value.UVal); + default: + assert(!"Add support for variable length form here"); + break; + } +} + +bool dwarf_gen::Attribute::generate(CompileUnit &CU, Streamer &Abbrev, + Streamer &Info, + StringTable &Strings) const { + auto F = static_cast(Form); + uint64_t ReferenceValue = -1ULL; + switch (F) { + // Attribute form encodings. + case DW_FORM_addr: + if (Value.Encoding == Unsigned || Value.Encoding == Signed) { + if (CU.AddrSize == 4) + Info.encodeBinaryData((uint32_t)Value.UVal); + else if (CU.AddrSize == 8) + Info.encodeBinaryData((uint64_t)Value.UVal); + else { + fprintf(stderr, "error: %s using unsupported address size %u\n", + FormEncodingString(F).data(), CU.AddrSize); + return false; + } + } else { + fprintf(stderr, "error: %s must have Unsigned as encoding\n", + FormEncodingString(F).data()); + return false; + } + break; + + case DW_FORM_block2: + if (Value.Encoding != Block) { + fprintf(stderr, "error: %s must have Block as encoding\n", + FormEncodingString(F).data()); + return false; + } + Info.encodeBinaryData((uint16_t)Value.Block.Size); + Info.write(Value.Block.Data, Value.Block.Size); + break; + + case DW_FORM_block4: + if (Value.Encoding != Block) { + fprintf(stderr, "error: %s must have Block as encoding\n", + FormEncodingString(F).data()); + return false; + } + Info.encodeBinaryData((uint32_t)Value.Block.Size); + Info.write(Value.Block.Data, Value.Block.Size); + break; + + case DW_FORM_data2: + if (Value.Encoding == Unsigned || Value.Encoding == Signed) + Info.encodeBinaryData((uint16_t)Value.UVal); + else { + fprintf(stderr, "error: %s must have Unsigned as encoding\n", + FormEncodingString(F).data()); + return false; + } + break; + + case DW_FORM_strp: + if (Value.Encoding != String) { + fprintf(stderr, "error: %s must have String as encoding\n", + FormEncodingString(F).data()); + return false; + } + Info.encodeBinaryData(Strings.addString(Value.CStr)); + break; + + case DW_FORM_data4: + if (Value.Encoding == Unsigned || Value.Encoding == Signed) + Info.encodeBinaryData((uint32_t)Value.UVal); + else { + fprintf(stderr, "error: %s must have Unsigned as encoding\n", + FormEncodingString(F).data()); + return false; + } + break; + + case DW_FORM_data8: + case DW_FORM_ref_sig8: + if (Value.Encoding == Unsigned || Value.Encoding == Signed) + Info.encodeBinaryData((uint64_t)Value.UVal); + else { + fprintf(stderr, "error: %s must have Unsigned as encoding\n", + FormEncodingString(F).data()); + return false; + } + break; + + case DW_FORM_string: + if (Value.Encoding != String) { + fprintf(stderr, "error: %s must have String as encoding\n", + FormEncodingString(F).data()); + return false; + } + if (Value.CStr) + Info.write(Value.CStr, strlen(Value.CStr) + 1); + else + Info.encodeBinaryData((uint8_t)0); + break; + + case DW_FORM_exprloc: + case DW_FORM_block: + if (Value.Encoding != Block) { + fprintf(stderr, "error: %s must have Block as encoding\n", + FormEncodingString(F).data()); + return false; + } + Info.encodeULEB128(Value.Block.Size); + Info.write(Value.Block.Data, Value.Block.Size); + break; + case DW_FORM_block1: + if (Value.Encoding != Block) { + fprintf(stderr, "error: DW_FORM_block2 must have Block as encoding\n"); + return false; + } + Info.encodeBinaryData((uint8_t)Value.Block.Size); + Info.write(Value.Block.Data, Value.Block.Size); + break; + + case DW_FORM_ref1: + if (Value.Encoding == Unsigned || Value.Encoding == Signed) + ReferenceValue = Value.UVal; + else if (Value.Encoding == EncodingType::Die) + ReferenceValue = calculateReferenceValue(CU, F, Value.Die); + else { + fprintf(stderr, "error: %s has unsupported encoding\n", + FormEncodingString(F).data()); + return false; + } + if (ReferenceValue != -1ULL) { + Info.encodeBinaryData((uint8_t)ReferenceValue); + } else { + fprintf(stderr, + "error: %s wasn't able to calculate the CU relative reference\n", + FormEncodingString(F).data()); + return false; + } + break; + + case DW_FORM_ref2: + if (Value.Encoding == Unsigned || Value.Encoding == Signed) + ReferenceValue = Value.UVal; + else if (Value.Encoding == EncodingType::Die) + ReferenceValue = calculateReferenceValue(CU, F, Value.Die); + else { + fprintf(stderr, "error: %s has unsupported encoding\n", + FormEncodingString(F).data()); + return false; + } + if (ReferenceValue != -1ULL) { + Info.encodeBinaryData((uint16_t)ReferenceValue); + } else { + fprintf(stderr, + "error: %s wasn't able to calculate the CU relative reference\n", + FormEncodingString(F).data()); + return false; + } + break; + + case DW_FORM_ref4: + if (Value.Encoding == Unsigned || Value.Encoding == Signed) + ReferenceValue = Value.UVal; + else if (Value.Encoding == EncodingType::Die) + ReferenceValue = calculateReferenceValue(CU, F, Value.Die); + else { + fprintf(stderr, "error: %s has unsupported encoding\n", + FormEncodingString(F).data()); + return false; + } + if (ReferenceValue != -1ULL) { + Info.encodeBinaryData((uint32_t)ReferenceValue); + } else { + fprintf(stderr, + "error: %s wasn't able to calculate the CU relative reference\n", + FormEncodingString(F).data()); + return false; + } + break; + + case DW_FORM_ref8: + if (Value.Encoding == Unsigned || Value.Encoding == Signed) + ReferenceValue = Value.UVal; + else if (Value.Encoding == EncodingType::Die) + ReferenceValue = calculateReferenceValue(CU, F, Value.Die); + else { + fprintf(stderr, "error: %s has unsupported encoding\n", + FormEncodingString(F).data()); + return false; + } + if (ReferenceValue != -1ULL) { + Info.encodeBinaryData(ReferenceValue); + } else { + fprintf(stderr, + "error: %s wasn't able to calculate the CU relative reference\n", + FormEncodingString(F).data()); + return false; + } + break; + + case DW_FORM_ref_udata: + if (Value.Encoding == Unsigned || Value.Encoding == Signed) + ReferenceValue = Value.UVal; + else if (Value.Encoding == EncodingType::Die) + ReferenceValue = calculateReferenceValue(CU, F, Value.Die); + else { + fprintf(stderr, "error: %s has unsupported encoding\n", + FormEncodingString(F).data()); + return false; + } + if (ReferenceValue != -1ULL) { + Info.encodeULEB128(ReferenceValue); + } else { + fprintf(stderr, + "error: %s wasn't able to calculate the CU relative reference\n", + FormEncodingString(F).data()); + return false; + } + break; + + case DW_FORM_data1: + if (Value.Encoding == Unsigned || Value.Encoding == Signed) + Info.encodeBinaryData((uint8_t)Value.UVal); + else { + fprintf(stderr, "error: %s must have Unsigned as encoding\n", + FormEncodingString(F).data()); + return false; + } + break; + case DW_FORM_flag: + if (Value.Encoding == Unsigned || Value.Encoding == Signed) { + assert(Value.UVal == 0 || Value.UVal == 1); + Info.encodeBinaryData((uint8_t)Value.UVal); + } else { + fprintf(stderr, "error: %s must have Unsigned or Signed as encoding\n", + FormEncodingString(F).data()); + return false; + } + break; + + case DW_FORM_sdata: + if (Value.Encoding == Unsigned || Value.Encoding == Signed) + Info.encodeSLEB128(Value.SVal); + else { + fprintf(stderr, "error: %s must have Signed as encoding\n", + FormEncodingString(F).data()); + return false; + } + break; + + case DW_FORM_udata: + case DW_FORM_strx: + case DW_FORM_addrx: + case DW_FORM_loclistx: + case DW_FORM_rnglistx: + case DW_FORM_GNU_addr_index: + case DW_FORM_GNU_str_index: + if (Value.Encoding == Unsigned || Value.Encoding == Signed) + Info.encodeULEB128(Value.SVal); + else { + fprintf(stderr, "error: %s must have Unsigned as encoding\n", + FormEncodingString(F).data()); + return false; + } + break; + + case DW_FORM_GNU_ref_alt: + case DW_FORM_GNU_strp_alt: + case DW_FORM_line_strp: + case DW_FORM_sec_offset: + case DW_FORM_strp_sup: + case DW_FORM_ref_sup: + // TODO: support DWARF64 + if (Value.Encoding == Unsigned || Value.Encoding == Signed) + Info.encodeBinaryData((uint32_t)Value.UVal); + else { + fprintf(stderr, "error: %s must have Unsigned as encoding\n", + FormEncodingString(F).data()); + return false; + } + break; + + case DW_FORM_ref_addr: + if (Value.Encoding == Unsigned || Value.Encoding == Signed) + ReferenceValue = Value.UVal; + else if (Value.Encoding == EncodingType::Die) + ReferenceValue = calculateReferenceValue(CU, F, Value.Die); + else { + fprintf(stderr, "error: %s has unsupported encoding\n", + FormEncodingString(F).data()); + return false; + } + if (ReferenceValue != -1ULL) { + if (CU.Version == 2) { + if (CU.AddrSize == 4) + Info.encodeBinaryData((uint32_t)ReferenceValue); + else if (CU.AddrSize == 8) + Info.encodeBinaryData((uint64_t)ReferenceValue); + else { + fprintf(stderr, "error: %s using unsupported address size %u\n", + FormEncodingString(F).data(), CU.AddrSize); + return false; + } + } else { + Info.encodeBinaryData((uint32_t)ReferenceValue); + } + } else { + fprintf(stderr, "error: %s wasn't able to calculate the reference\n", + FormEncodingString(F).data()); + return false; + } + break; + case DW_FORM_flag_present: + case DW_FORM_implicit_const: + // No bytes need to be written into .debug_info for these attributes. + break; + + case DW_FORM_data16: + case DW_FORM_indirect: + case DW_FORM_lo_user: + fprintf(stderr, "error: %s is not supported\n", + FormEncodingString(F).data()); + return false; // Not supported + } + return true; +} + +//------------------------------------------------------------------------------ +// dwarf_gen::DIE +//------------------------------------------------------------------------------ +void DIE::preGenerate(CompileUnit &CU, uint64_t &DIEOffset) { + Offset = DIEOffset; + DIEOffset += getULEB128Size(Tag); + for (const auto &Attr : Attrs) { + DIEOffset += Attr.getDebugInfoByteSize(CU); + } + if (!Children.empty()) { + for (auto &Child : Children) + Child.preGenerate(CU, DIEOffset); + DIEOffset += 1; // zero abbrevation code to terminate child chain + } +} + +bool DIE::generate(DWARFGenerator &Dwarf, CompileUnit &CU, Streamer &Abbrev, + Streamer &Info, StringTable &Strings) const { + const uint32_t AbbrevCode = Dwarf.getAbbrevation(*this); + Info.encodeULEB128(AbbrevCode); + for (const auto &Attr : Attrs) { + if (!Attr.generate(CU, Abbrev, Info, Strings)) + return false; + } + if (!Children.empty()) { + for (const auto &Child : Children) { + if (!Child.generate(Dwarf, CU, Abbrev, Info, Strings)) + return false; + } + // Emit a zero abbreviation code to terminate the sibling chain + Info.encodeBinaryData((uint8_t)0); + } + return true; +}