Index: llvm/trunk/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h =================================================================== --- llvm/trunk/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h +++ llvm/trunk/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h @@ -348,6 +348,7 @@ } }; +public: /// A single entry in the Name Table (Dwarf 5 sect. 6.1.1.4.6) of the Name /// Index. struct NameTableEntry { @@ -355,7 +356,6 @@ uint32_t EntryOffset; ///< Offset of the first Entry in the list. }; -public: /// Represents a single accelerator table within the Dwarf 5 .debug_names /// section. class NameIndex { @@ -373,16 +373,6 @@ uint32_t EntryOffsetsBase; uint32_t EntriesBase; - /// Reads an entry in the Hash Array for the given Index. The input Index - /// is 1-based. - uint32_t getHashArrayEntry(uint32_t Index) const; - - /// Reads an entry in the Name Table for the given Index. The Name Table - /// consists of two arrays -- String Offsets and Entry Offsets. The returned - /// offsets are relative to the starts of respective sections. Input Index - /// is 1-based. - NameTableEntry getNameTableEntry(uint32_t Index) const; - Expected<Entry> getEntry(uint32_t *Offset) const; void dumpCUs(ScopedPrinter &W) const; @@ -423,6 +413,16 @@ uint32_t getBucketArrayEntry(uint32_t Bucket) const; uint32_t getBucketCount() const { return Hdr.BucketCount; } + /// Reads an entry in the Hash Array for the given Index. The input Index + /// is 1-based. + uint32_t getHashArrayEntry(uint32_t Index) const; + + /// Reads an entry in the Name Table for the given Index. The Name Table + /// consists of two arrays -- String Offsets and Entry Offsets. The returned + /// offsets are relative to the starts of respective sections. Input Index + /// is 1-based. + NameTableEntry getNameTableEntry(uint32_t Index) const; + uint32_t getNameCount() const { return Hdr.NameCount; } llvm::Error extract(); Index: llvm/trunk/include/llvm/DebugInfo/DWARF/DWARFVerifier.h =================================================================== --- llvm/trunk/include/llvm/DebugInfo/DWARF/DWARFVerifier.h +++ llvm/trunk/include/llvm/DebugInfo/DWARF/DWARFVerifier.h @@ -11,6 +11,7 @@ #define LLVM_DEBUGINFO_DWARF_DWARFVERIFIER_H #include "llvm/DebugInfo/DIContext.h" +#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" #include "llvm/DebugInfo/DWARF/DWARFAddressRange.h" #include "llvm/DebugInfo/DWARF/DWARFDie.h" @@ -26,7 +27,6 @@ class DWARFUnit; class DWARFDataExtractor; class DWARFDebugAbbrev; -class DWARFDebugNames; class DataExtractor; struct DWARFSection; @@ -234,6 +234,8 @@ const char *SectionName); unsigned verifyDebugNamesCULists(const DWARFDebugNames &AccelTable); + unsigned verifyNameIndexBuckets(const DWARFDebugNames::NameIndex &NI, + const DataExtractor &StrData); /// Verify that the DWARF v5 accelerator table is valid. /// @@ -242,6 +244,8 @@ /// section and can be parsed. /// - The CU lists reference existing compile units. /// - The buckets have a valid index, or they are empty. + /// - All names are reachable via the hash table (they have the correct hash, + /// and the hash is in the correct bucket). /// /// \param AccelSection section containing the acceleration table /// \param StrData string section Index: llvm/trunk/lib/DebugInfo/DWARF/DWARFVerifier.cpp =================================================================== --- llvm/trunk/lib/DebugInfo/DWARF/DWARFVerifier.cpp +++ llvm/trunk/lib/DebugInfo/DWARF/DWARFVerifier.cpp @@ -8,7 +8,6 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/DWARF/DWARFVerifier.h" -#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" @@ -16,6 +15,7 @@ #include "llvm/DebugInfo/DWARF/DWARFExpression.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFSection.h" +#include "llvm/Support/DJB.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" @@ -826,6 +826,119 @@ return NumErrors; } +unsigned +DWARFVerifier::verifyNameIndexBuckets(const DWARFDebugNames::NameIndex &NI, + const DataExtractor &StrData) { + struct BucketInfo { + uint32_t Bucket; + uint32_t Index; + + constexpr BucketInfo(uint32_t Bucket, uint32_t Index) + : Bucket(Bucket), Index(Index) {} + bool operator<(const BucketInfo &RHS) const { return Index < RHS.Index; }; + }; + + uint32_t NumErrors = 0; + if (NI.getBucketCount() == 0) { + warn() << formatv("Name Index @ {0:x} does not contain a hash table.\n", + NI.getUnitOffset()); + return NumErrors; + } + + // Build up a list of (Bucket, Index) pairs. We use this later to verify that + // each Name is reachable from the appropriate bucket. + std::vector<BucketInfo> BucketStarts; + BucketStarts.reserve(NI.getBucketCount() + 1); + for (uint32_t Bucket = 0, End = NI.getBucketCount(); Bucket < End; ++Bucket) { + uint32_t Index = NI.getBucketArrayEntry(Bucket); + if (Index > NI.getNameCount()) { + error() << formatv("Bucket {0} of Name Index @ {1:x} contains invalid " + "value {2}. Valid range is [0, {3}].\n", + Bucket, NI.getUnitOffset(), Index, NI.getNameCount()); + ++NumErrors; + continue; + } + if (Index > 0) + BucketStarts.emplace_back(Bucket, Index); + } + + // If there were any buckets with invalid values, skip further checks as they + // will likely produce many errors which will only confuse the actual root + // problem. + if (NumErrors > 0) + return NumErrors; + + // Sort the list in the order of increasing "Index" entries. + array_pod_sort(BucketStarts.begin(), BucketStarts.end()); + + // Insert a sentinel entry at the end, so we can check that the end of the + // table is covered in the loop below. + BucketStarts.emplace_back(NI.getBucketCount(), NI.getNameCount() + 1); + + // Loop invariant: NextUncovered is the (1-based) index of the first Name + // which is not reachable by any of the buckets we processed so far (and + // hasn't been reported as uncovered). + uint32_t NextUncovered = 1; + for (const BucketInfo &B : BucketStarts) { + // Under normal circumstances B.Index be equal to NextUncovered, but it can + // be less if a bucket points to names which are already known to be in some + // bucket we processed earlier. In that case, we won't trigger this error, + // but report the mismatched hash value error instead. (We know the hash + // will not match because we have already verified that the name's hash + // puts it into the previous bucket.) + if (B.Index > NextUncovered) { + error() << formatv("Name Index @ {0:x}: Name table entries [{1}, {2}] " + "are not covered by the hash table.\n", + NI.getUnitOffset(), NextUncovered, B.Index - 1); + ++NumErrors; + } + uint32_t Idx = B.Index; + + // The rest of the checks apply only to non-sentinel entries. + if (B.Bucket == NI.getBucketCount()) + break; + + // This triggers if a non-empty bucket points to a name with a mismatched + // hash. Clients are likely to interpret this as an empty bucket, because a + // mismatched hash signals the end of a bucket, but if this is indeed an + // empty bucket, the producer should have signalled this by marking the + // bucket as empty. + uint32_t FirstHash = NI.getHashArrayEntry(Idx); + if (FirstHash % NI.getBucketCount() != B.Bucket) { + error() << formatv( + "Name Index @ {0:x}: Bucket {1} is not empty but points to a " + "mismatched hash value {2:x} (belonging to bucket {3}).\n", + NI.getUnitOffset(), B.Bucket, FirstHash, + FirstHash % NI.getBucketCount()); + ++NumErrors; + } + + // This find the end of this bucket and also verifies that all the hashes in + // this bucket are correct by comparing the stored hashes to the ones we + // compute ourselves. + while (Idx <= NI.getNameCount()) { + uint32_t Hash = NI.getHashArrayEntry(Idx); + if (Hash % NI.getBucketCount() != B.Bucket) + break; + + auto NTE = NI.getNameTableEntry(Idx); + const char *Str = StrData.getCStr(&NTE.StringOffset); + if (caseFoldingDjbHash(Str) != Hash) { + error() << formatv("Name Index @ {0:x}: String ({1}) at index {2} " + "hashes to {3:x}, but " + "the Name Index hash is {4:x}\n", + NI.getUnitOffset(), Str, Idx, + caseFoldingDjbHash(Str), Hash); + ++NumErrors; + } + + ++Idx; + } + NextUncovered = std::max(NextUncovered, Idx); + } + return NumErrors; +} + unsigned DWARFVerifier::verifyDebugNames(const DWARFSection &AccelSection, const DataExtractor &StrData) { unsigned NumErrors = 0; @@ -843,20 +956,8 @@ } NumErrors += verifyDebugNamesCULists(AccelTable); - - for (const DWARFDebugNames::NameIndex &NI : AccelTable) { - for (uint32_t Bucket = 0, End = NI.getBucketCount(); Bucket < End; - ++Bucket) { - uint32_t Index = NI.getBucketArrayEntry(Bucket); - if (Index > NI.getNameCount()) { - error() << formatv("Bucket {0} of Name Index @ {1:x} contains invalid " - "value {2}. Valid range is [0, {3}].\n", - Bucket, NI.getUnitOffset(), Index, - NI.getNameCount()); - ++NumErrors; - } - } - } + for (const auto &NI : AccelTable) + NumErrors += verifyNameIndexBuckets(NI, StrData); return NumErrors; } Index: llvm/trunk/test/tools/llvm-dwarfdump/X86/debug-names-verify-no-buckets.s =================================================================== --- llvm/trunk/test/tools/llvm-dwarfdump/X86/debug-names-verify-no-buckets.s +++ llvm/trunk/test/tools/llvm-dwarfdump/X86/debug-names-verify-no-buckets.s @@ -0,0 +1,80 @@ +# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj | \ +# RUN: llvm-dwarfdump -verify - | FileCheck %s + +# CHECK: warning: Name Index @ 0x0 does not contain a hash table. + .section .debug_str,"MS",@progbits,1 +.Lstring_foo: + .asciz "foo" +.Lstring_producer: + .asciz "Hand-written dwarf" + + .section .debug_abbrev,"",@progbits +.Lsection_abbrev: + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 14 # DW_FORM_strp + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Lcu_end0-.Lcu_start0 # Length of Unit +.Lcu_start0: + .short 4 # DWARF version number + .long .Lsection_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] DW_TAG_compile_unit + .long .Lstring_producer # DW_AT_producer + .short 12 # DW_AT_language +.Ldie_foo: + .byte 2 # Abbrev [2] DW_TAG_subprogram + .long .Lstring_foo # DW_AT_name + # DW_AT_external + .byte 0 # End Of Children Mark +.Lcu_end0: + + .section .debug_names,"",@progbits + .long .Lnames_end0-.Lnames_start0 # Header: contribution length +.Lnames_start0: + .short 5 # Header: version + .short 0 # Header: padding + .long 1 # Header: compilation unit count + .long 0 # Header: local type unit count + .long 0 # Header: foreign type unit count + .long 0 # Header: bucket count + .long 1 # Header: name count + .long .Lnames_abbrev_end0-.Lnames_abbrev_start0 # Header: abbreviation table size + .long 0 # Header: augmentation length + .long .Lcu_begin0 # Compilation unit 0 + .long .Lstring_foo # String in Bucket 1: foo + .long .Lnames0-.Lnames_entries0 # Offset in Bucket 1 +.Lnames_abbrev_start0: + .byte 46 # Abbrev code + .byte 46 # DW_TAG_subprogram + .byte 3 # DW_IDX_die_offset + .byte 6 # DW_FORM_data4 + .byte 0 # End of abbrev + .byte 0 # End of abbrev + .byte 0 # End of abbrev list +.Lnames_abbrev_end0: +.Lnames_entries0: +.Lnames0: + .byte 46 # Abbrev code + .long .Ldie_foo-.Lcu_begin0 # DW_IDX_die_offset + .long 0 # End of list: foo + .p2align 2 +.Lnames_end0: Index: llvm/trunk/test/tools/llvm-dwarfdump/X86/debug-names-verify-unhashed-names.s =================================================================== --- llvm/trunk/test/tools/llvm-dwarfdump/X86/debug-names-verify-unhashed-names.s +++ llvm/trunk/test/tools/llvm-dwarfdump/X86/debug-names-verify-unhashed-names.s @@ -0,0 +1,123 @@ +# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj | \ +# RUN: not llvm-dwarfdump -verify - | FileCheck %s + +# CHECK: Name Index @ 0x0: Name table entries [1, 1] are not covered by the hash table. +# CHECK: Name Index @ 0x0: Name table entries [4, 4] are not covered by the hash table. + .section .debug_str,"MS",@progbits,1 +.Lstring_foo: + .asciz "foo" +.Lstring_bar: + .asciz "bar" +.Lstring_baz: + .asciz "baz" +.Lstring_barfuz: + .asciz "barfuz" +.Lstring_producer: + .asciz "Hand-written dwarf" + + .section .debug_abbrev,"",@progbits +.Lsection_abbrev: + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 14 # DW_FORM_strp + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Lcu_end0-.Lcu_start0 # Length of Unit +.Lcu_start0: + .short 4 # DWARF version number + .long .Lsection_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] DW_TAG_compile_unit + .long .Lstring_producer # DW_AT_producer + .short 12 # DW_AT_language +.Ldie_foo: + .byte 2 # Abbrev [2] DW_TAG_subprogram + .long .Lstring_foo # DW_AT_name + # DW_AT_external +.Ldie_bar: + .byte 2 # Abbrev [2] DW_TAG_subprogram + .long .Lstring_bar # DW_AT_name + # DW_AT_external +.Ldie_baz: + .byte 2 # Abbrev [2] DW_TAG_subprogram + .long .Lstring_baz # DW_AT_name + # DW_AT_external +.Ldie_barfuz: + .byte 2 # Abbrev [2] DW_TAG_subprogram + .long .Lstring_barfuz # DW_AT_name + # DW_AT_external + .byte 0 # End Of Children Mark +.Lcu_end0: + + .section .debug_names,"",@progbits + .long .Lnames_end0-.Lnames_start0 # Header: contribution length +.Lnames_start0: + .short 5 # Header: version + .short 0 # Header: padding + .long 1 # Header: compilation unit count + .long 0 # Header: local type unit count + .long 0 # Header: foreign type unit count + .long 2 # Header: bucket count + .long 4 # Header: name count + .long .Lnames_abbrev_end0-.Lnames_abbrev_start0 # Header: abbreviation table size + .long 0 # Header: augmentation length + .long .Lcu_begin0 # Compilation unit 0 + .long 2 # Bucket 0 + .long 3 # Bucket 1 + .long 193491849 # Hash in no Bucket + .long 193487034 # Hash in Bucket 0 + .long 4086570991 # Hash in Bucket 1 + .long 193487042 # Hash in no Bucket + .long .Lstring_foo # String in no Bucket + .long .Lstring_bar # String in Bucket 0 + .long .Lstring_barfuz # String in Bucket 1 + .long .Lstring_baz # String in no Bucket + .long .Lnames0-.Lnames_entries0 # Offset in no Bucket + .long .Lnames1-.Lnames_entries0 # Offset in Bucket 0 + .long .Lnames2-.Lnames_entries0 # Offset in Bucket 1 + .long .Lnames3-.Lnames_entries0 # Offset in no Bucket +.Lnames_abbrev_start0: + .byte 46 # Abbrev code + .byte 46 # DW_TAG_subprogram + .byte 3 # DW_IDX_die_offset + .byte 6 # DW_FORM_data4 + .byte 0 # End of abbrev + .byte 0 # End of abbrev + .byte 0 # End of abbrev list +.Lnames_abbrev_end0: +.Lnames_entries0: +.Lnames0: + .byte 46 # Abbrev code + .long .Ldie_foo-.Lcu_begin0 # DW_IDX_die_offset + .long 0 # End of list: foo +.Lnames1: + .byte 46 # Abbrev code + .long .Ldie_bar-.Lcu_begin0 # DW_IDX_die_offset + .long 0 # End of list: bar +.Lnames2: + .byte 46 # Abbrev code + .long .Ldie_baz-.Lcu_begin0 # DW_IDX_die_offset + .long 0 # End of list: baz +.Lnames3: + .byte 46 # Abbrev code + .long .Ldie_barfuz-.Lcu_begin0# DW_IDX_die_offset + .long 0 # End of list: barfuz + .p2align 2 +.Lnames_end0: Index: llvm/trunk/test/tools/llvm-dwarfdump/X86/debug-names-verify-wrong-hash.s =================================================================== --- llvm/trunk/test/tools/llvm-dwarfdump/X86/debug-names-verify-wrong-hash.s +++ llvm/trunk/test/tools/llvm-dwarfdump/X86/debug-names-verify-wrong-hash.s @@ -0,0 +1,97 @@ +# RUN: llvm-mc -triple x86_64-pc-linux %s -filetype=obj | \ +# RUN: not llvm-dwarfdump -verify - | FileCheck %s + +# CHECK: Name Index @ 0x0: String (baz) at index 2 hashes to 0xb8860c2, but the Name Index hash is 0xb8860c4 +# CHECK: Name Index @ 0x0: Bucket 1 is not empty but points to a mismatched hash value 0xb8860c4 (belonging to bucket 0). + .section .debug_str,"MS",@progbits,1 +.Lstring_bar: + .asciz "bar" +.Lstring_baz: + .asciz "baz" +.Lstring_producer: + .asciz "Hand-written dwarf" + + .section .debug_abbrev,"",@progbits +.Lsection_abbrev: + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 14 # DW_FORM_strp + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Lcu_end0-.Lcu_start0 # Length of Unit +.Lcu_start0: + .short 4 # DWARF version number + .long .Lsection_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] DW_TAG_compile_unit + .long .Lstring_producer # DW_AT_producer + .short 12 # DW_AT_language +.Ldie_bar: + .byte 2 # Abbrev [2] DW_TAG_subprogram + .long .Lstring_bar # DW_AT_name + # DW_AT_external +.Ldie_baz: + .byte 2 # Abbrev [2] DW_TAG_subprogram + .long .Lstring_baz # DW_AT_name + # DW_AT_external + .byte 0 # End Of Children Mark +.Lcu_end0: + + .section .debug_names,"",@progbits + .long .Lnames_end0-.Lnames_start0 # Header: contribution length +.Lnames_start0: + .short 5 # Header: version + .short 0 # Header: padding + .long 1 # Header: compilation unit count + .long 0 # Header: local type unit count + .long 0 # Header: foreign type unit count + .long 2 # Header: bucket count + .long 2 # Header: name count + .long .Lnames_abbrev_end0-.Lnames_abbrev_start0 # Header: abbreviation table size + .long 0 # Header: augmentation length + .long .Lcu_begin0 # Compilation unit 0 + .long 1 # Bucket 0 + .long 2 # Bucket 1 + .long 193487034 # Hash in Bucket 1 + .long 193487044 # Hash in Bucket 1 and 2 + .long .Lstring_bar # String in Bucket 1: bar + .long .Lstring_baz # String in Bucket 1 and 2: baz + .long .Lnames0-.Lnames_entries0 # Offset in Bucket 1 + .long .Lnames1-.Lnames_entries0 # Offset in Bucket 1 and 2 +.Lnames_abbrev_start0: + .byte 46 # Abbrev code + .byte 46 # DW_TAG_subprogram + .byte 3 # DW_IDX_die_offset + .byte 6 # DW_FORM_data4 + .byte 0 # End of abbrev + .byte 0 # End of abbrev + .byte 0 # End of abbrev list +.Lnames_abbrev_end0: +.Lnames_entries0: +.Lnames0: + .byte 46 # Abbrev code + .long .Ldie_bar-.Lcu_begin0 # DW_IDX_die_offset + .long 0 # End of list: bar +.Lnames1: + .byte 46 # Abbrev code + .long .Ldie_baz-.Lcu_begin0 # DW_IDX_die_offset + .long 0 # End of list: baz + .p2align 2 +.Lnames_end0: