diff --git a/bolt/include/bolt/Core/DebugData.h b/bolt/include/bolt/Core/DebugData.h --- a/bolt/include/bolt/Core/DebugData.h +++ b/bolt/include/bolt/Core/DebugData.h @@ -715,8 +715,11 @@ std::unique_ptr Buffer; std::unique_ptr Stream; }; - /// Map original unit abbrev offset to abbreviations data. - std::map UnitsAbbrevData; + /// Map original unit to abbreviations data. + std::unordered_map UnitsAbbrevData; + + /// Map from Hash Signature to AbbrevData. + llvm::StringMap> AbbrevDataCache; /// Attributes substitution (patch) information. struct PatchInfo { @@ -783,10 +786,8 @@ /// Return an offset in the finalized abbrev section corresponding to CU/TU. uint64_t getAbbreviationsOffsetForUnit(const DWARFUnit &Unit) { assert(!DWOId && "offsets are tracked for non-DWO units only"); - assert(UnitsAbbrevData.find(Unit.getAbbreviationsOffset()) != - UnitsAbbrevData.end() && - "no abbrev data found for unit"); - return UnitsAbbrevData[Unit.getAbbreviationsOffset()].Offset; + assert(UnitsAbbrevData.count(&Unit) && "no abbrev data found for unit"); + return UnitsAbbrevData[&Unit]->Offset; } }; diff --git a/bolt/lib/Core/DebugData.cpp b/bolt/lib/Core/DebugData.cpp --- a/bolt/lib/Core/DebugData.cpp +++ b/bolt/lib/Core/DebugData.cpp @@ -18,6 +18,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/LEB128.h" +#include "llvm/Support/SHA1.h" #include #include #include @@ -694,20 +695,30 @@ if (!Abbrevs) return; - // Multiple units may share the same abbreviations. Only add abbreviations - // for the first unit and reuse them. - const uint64_t AbbrevOffset = Unit.getAbbreviationsOffset(); - if (UnitsAbbrevData.find(AbbrevOffset) != UnitsAbbrevData.end()) - return; + const PatchesTy &UnitPatches = Patches[&Unit]; - AbbrevData &UnitData = UnitsAbbrevData[AbbrevOffset]; + // We are duplicating abbrev sections, to handle the case where for one CU we + // modify it, but for another we don't. + auto UnitDataPtr = std::make_unique(); + AbbrevData &UnitData = *UnitDataPtr.get(); UnitData.Buffer = std::make_unique(); UnitData.Stream = std::make_unique(*UnitData.Buffer); - - const PatchesTy &UnitPatches = Patches[&Unit]; - raw_svector_ostream &OS = *UnitData.Stream.get(); + // Returns true if AbbrevData is re-used, false otherwise. + auto hashAndAddAbbrev = [&](StringRef AbbrevData) -> bool { + llvm::SHA1 Hasher; + Hasher.update(AbbrevData); + StringRef Key = Hasher.final(); + auto Iter = AbbrevDataCache.find(Key); + if (Iter != AbbrevDataCache.end()) { + UnitsAbbrevData[&Unit] = Iter->second.get(); + return true; + } + AbbrevDataCache[Key] = std::move(UnitDataPtr); + UnitsAbbrevData[&Unit] = &UnitData; + return false; + }; // Take a fast path if there are no patches to apply. Simply copy the original // contents. if (UnitPatches.empty()) { @@ -756,9 +767,10 @@ AbbrevContents = AbbrevSectionContents; } - OS.reserveExtraSpace(AbbrevContents.size()); - OS << AbbrevContents; - + if (!hashAndAddAbbrev(AbbrevContents)) { + OS.reserveExtraSpace(AbbrevContents.size()); + OS << AbbrevContents; + } return; } @@ -798,9 +810,13 @@ encodeULEB128(0, OS); } encodeULEB128(0, OS); + + hashAndAddAbbrev(OS.str()); } std::unique_ptr DebugAbbrevWriter::finalize() { + // Used to create determinism for writing out abbrevs. + std::vector Abbrevs; if (DWOId) { // We expect abbrev_offset to always be zero for DWO units as there // should be one CU per DWO, and TUs should share the same abbreviation @@ -818,33 +834,40 @@ } } + DWARFUnit *Unit = Context.getDWOCompileUnitForHash(*DWOId); // Issue abbreviations for the DWO CU only. - addUnitAbbreviations(*Context.getDWOCompileUnitForHash(*DWOId)); + addUnitAbbreviations(*Unit); + AbbrevData *Abbrev = UnitsAbbrevData[Unit]; + Abbrevs.push_back(Abbrev); } else { + Abbrevs.reserve(Context.getNumCompileUnits() + Context.getNumTypeUnits()); + std::unordered_set ProcessedAbbrevs; // Add abbreviations from compile and type non-DWO units. - for (const std::unique_ptr &Unit : Context.normal_units()) + for (const std::unique_ptr &Unit : Context.normal_units()) { addUnitAbbreviations(*Unit); + AbbrevData *Abbrev = UnitsAbbrevData[Unit.get()]; + if (!ProcessedAbbrevs.insert(Abbrev).second) + continue; + Abbrevs.push_back(Abbrev); + } } DebugBufferVector ReturnBuffer; - // Pre-calculate the total size of abbrev section. uint64_t Size = 0; - for (const auto &KV : UnitsAbbrevData) { - const AbbrevData &UnitData = KV.second; - Size += UnitData.Buffer->size(); - } + for (const AbbrevData *UnitData : Abbrevs) + Size += UnitData->Buffer->size(); + ReturnBuffer.reserve(Size); uint64_t Pos = 0; - for (auto &KV : UnitsAbbrevData) { - AbbrevData &UnitData = KV.second; - ReturnBuffer.append(*UnitData.Buffer); - UnitData.Offset = Pos; - Pos += UnitData.Buffer->size(); - - UnitData.Buffer.reset(); - UnitData.Stream.reset(); + for (AbbrevData *UnitData : Abbrevs) { + ReturnBuffer.append(*UnitData->Buffer); + UnitData->Offset = Pos; + Pos += UnitData->Buffer->size(); + + UnitData->Buffer.reset(); + UnitData->Stream.reset(); } return std::make_unique(ReturnBuffer); diff --git a/bolt/test/X86/shared-abbrev.s b/bolt/test/X86/shared-abbrev.s new file mode 100644 --- /dev/null +++ b/bolt/test/X86/shared-abbrev.s @@ -0,0 +1,121 @@ +# RUN: rm -rf %t +# RUN: mkdir %t +# RUN: llvm-mc -filetype=obj -triple=x86_64 -dwarf-version=4 %s -o %t/shared-abbrev.o +# RUN: %clang %cflags %t/shared-abbrev.o -o %t/shared-abbrev.exe +# RUN: llvm-bolt %t/shared-abbrev.exe -o %t/shared-abbrev.exe.bolt -update-debug-sections +# RUN: llvm-dwarfdump --debug-info %t/shared-abbrev.exe.bolt | FileCheck %s + +# CHECK: 0x00000000: +# CHECK-SAME: abbr_offset = 0x0000 +# CHECK-EMPTY: +# CHECK-NEXT: DW_TAG_compile_unit +# CHECK-NEXT: DW_AT_stmt_list +# CHECK-NEXT: DW_AT_low_pc +# CHECK-NEXT: DW_AT_ranges +# CHECK: 0x0000001d: +# CHECK-SAME: abbr_offset = 0x0017 +# CHECK-EMPTY: +# CHECK: DW_TAG_compile_unit +# CHECK-NEXT: DW_AT_stmt_list +# CHECK-NEXT: DW_AT_low_pc +# CHECK-NEXT: DW_AT_ranges +# CHECK: 0x0000003a: +# CHECK-SAME: abbr_offset = 0x0000 +# CHECK-EMPTY: +# CHECK-NEXT: DW_TAG_compile_unit +# CHECK-NEXT: DW_AT_stmt_list +# CHECK-NEXT: DW_AT_low_pc +# CHECK-NEXT: DW_AT_ranges + + .text + .file "main.cpp" + .globl main # -- Begin function main + .p2align 4, 0x90 + .type main,@function +main: # @main +.Lfunc_begin0: + .file 1 "test" "main.cpp" + .loc 1 1 0 # main.cpp:1:0 + .cfi_startproc + .cfi_def_cfa %rsp, 8 + retq +.Ltmp1: +.Lfunc_end0: + .size main, .Lfunc_end0-main + .cfi_endproc + # -- End function + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 0 # DW_CHILDREN_no + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 7 # DW_FORM_data8 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 0 # DW_CHILDREN_no + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 85 # DW_AT_ranges + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 2 # Abbrev [2] DW_TAG_compile_unit + .long .Lline_table_start0 # DW_AT_stmt_list + .quad 0 # DW_AT_low_pc + .byte 0 # End Of Children Mark + .long .Ldebug_ranges0 # DW_AT_ranges --- end manual -- +.Ldebug_info_end0: + + # Second CU table. + .long .Ldebug_info_end1-.Ldebug_info_start1 # Length of Unit +.Ldebug_info_start1: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] DW_TAG_compile_unit + .long .Lline_table_start0 # DW_AT_stmt_list + .quad .Lfunc_begin0 # DW_AT_low_pc + .quad .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 0 # End Of Children Mark +.Ldebug_info_end1: + + # Third CU table. + .long .Ldebug_info_end2-.Ldebug_info_start2 # Length of Unit +.Ldebug_info_start2: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 2 # Abbrev [2] DW_TAG_compile_unit + .long .Lline_table_start0 # DW_AT_stmt_list + .quad 0 # DW_AT_low_pc + .byte 0 # End Of Children Mark + .long .Ldebug_ranges0 # DW_AT_ranges --- end manual -- +.Ldebug_info_end2: + .section .debug_ranges,"",@progbits +.Ldebug_ranges0: + .quad .Lfunc_begin0 + .quad .Lfunc_end0 + .quad .Lfunc_begin0 + .quad .Lfunc_end0 + .quad 0 + .quad 0 + .section ".note.GNU-stack","",@progbits + .addrsig + .section .debug_line,"",@progbits +.Lline_table_start0: