diff --git a/lld/MachO/CMakeLists.txt b/lld/MachO/CMakeLists.txt --- a/lld/MachO/CMakeLists.txt +++ b/lld/MachO/CMakeLists.txt @@ -9,6 +9,7 @@ UnwindInfoSection.cpp Driver.cpp DriverUtils.cpp + Dwarf.cpp ExportTrie.cpp InputFiles.cpp InputSection.cpp diff --git a/lld/MachO/Dwarf.h b/lld/MachO/Dwarf.h new file mode 100644 --- /dev/null +++ b/lld/MachO/Dwarf.h @@ -0,0 +1,53 @@ +//===- DWARF.h -----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-------------------------------------------------------------------===// + +#ifndef LLD_MACHO_DWARF_H +#define LLD_MACHO_DWARF_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/DWARF/DWARFObject.h" + +namespace lld { +namespace macho { + +class ObjFile; + +// Implements the interface between LLVM's DWARF-parsing utilities and LLD's +// InputSection structures. +class DwarfObject final : public llvm::DWARFObject { +public: + bool isLittleEndian() const override { return true; } + + llvm::Optional find(const llvm::DWARFSection &sec, + uint64_t pos) const override { + // TODO: implement this + return llvm::None; + } + + void forEachInfoSections( + llvm::function_ref f) const override { + f(infoSection); + } + + llvm::StringRef getAbbrevSection() const override { return abbrevSection; } + llvm::StringRef getStrSection() const override { return strSection; } + + // Returns an instance of DwarfObject if the given object file has the + // relevant DWARF debug sections. + static std::unique_ptr create(ObjFile *); + +private: + llvm::DWARFSection infoSection; + llvm::StringRef abbrevSection; + llvm::StringRef strSection; +}; + +} // namespace macho +} // namespace lld + +#endif diff --git a/lld/MachO/Dwarf.cpp b/lld/MachO/Dwarf.cpp new file mode 100644 --- /dev/null +++ b/lld/MachO/Dwarf.cpp @@ -0,0 +1,49 @@ +//===- DWARF.cpp ----------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Dwarf.h" +#include "InputFiles.h" +#include "InputSection.h" +#include "OutputSegment.h" + +#include + +using namespace lld; +using namespace lld::macho; +using namespace llvm; + +std::unique_ptr DwarfObject::create(ObjFile *obj) { + auto dObj = std::make_unique(); + bool hasDwarfInfo = false; + for (SubsectionMap subsecMap : obj->subsections) { + for (auto it : subsecMap) { + InputSection *isec = it.second; + if (!(isDebugSection(isec->flags) && + isec->segname == segment_names::dwarf)) + continue; + + if (isec->name == "__debug_info") { + dObj->infoSection.Data = toStringRef(isec->data); + hasDwarfInfo = true; + continue; + } + + if (StringRef *s = StringSwitch(isec->name) + .Case("__debug_abbrev", &dObj->abbrevSection) + .Case("__debug_str", &dObj->strSection) + .Default(nullptr)) { + *s = toStringRef(isec->data); + hasDwarfInfo = true; + } + } + } + + if (hasDwarfInfo) + return dObj; + return nullptr; +} diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h --- a/lld/MachO/InputFiles.h +++ b/lld/MachO/InputFiles.h @@ -15,6 +15,7 @@ #include "lld/Common/Memory.h" #include "llvm/ADT/DenseSet.h" #include "llvm/BinaryFormat/MachO.h" +#include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/Object/Archive.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/TextAPI/MachO/InterfaceFile.h" @@ -91,6 +92,11 @@ public: explicit ObjFile(MemoryBufferRef mb); static bool classof(const InputFile *f) { return f->kind() == ObjKind; } + + llvm::DWARFUnit *compileUnit = nullptr; + +private: + void parseDebugInfo(); }; // command-line -sectcreate file diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -44,6 +44,7 @@ #include "InputFiles.h" #include "Config.h" #include "Driver.h" +#include "Dwarf.h" #include "ExportTrie.h" #include "InputSection.h" #include "MachOStructs.h" @@ -54,6 +55,7 @@ #include "Symbols.h" #include "Target.h" +#include "lld/Common/DWARF.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "lld/Common/Reproduce.h" @@ -387,6 +389,28 @@ // parsed all the symbols. for (size_t i = 0, n = subsections.size(); i < n; ++i) parseRelocations(sectionHeaders[i], subsections[i]); + + parseDebugInfo(); +} + +void ObjFile::parseDebugInfo() { + std::unique_ptr dObj = DwarfObject::create(this); + if (!dObj) + return; + + auto *ctx = make( + std::move(dObj), "", + [&](Error err) { warn(getName() + ": " + toString(std::move(err))); }, + [&](Error warning) { + warn(getName() + ": " + toString(std::move(warning))); + }); + + // TODO: Since object files can contain a lot of DWARF info, we should verify + // that we are parsing just the info we need + const DWARFContext::compile_unit_range &units = ctx->compile_units(); + auto it = units.begin(); + compileUnit = it->get(); + assert(std::next(it) == units.end()); } // The path can point to either a dylib or a .tbd file. diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -35,15 +35,20 @@ llvm::PointerUnion referent; }; -inline bool isZeroFill(uint8_t flags) { +inline bool isZeroFill(uint32_t flags) { return llvm::MachO::isVirtualSection(flags & llvm::MachO::SECTION_TYPE); } -inline bool isThreadLocalVariables(uint8_t flags) { +inline bool isThreadLocalVariables(uint32_t flags) { return (flags & llvm::MachO::SECTION_TYPE) == llvm::MachO::S_THREAD_LOCAL_VARIABLES; } +inline bool isDebugSection(uint32_t flags) { + return (flags & llvm::MachO::SECTION_ATTRIBUTES_USR) == + llvm::MachO::S_ATTR_DEBUG; +} + class InputSection { public: virtual ~InputSection() = default; diff --git a/lld/MachO/OutputSegment.h b/lld/MachO/OutputSegment.h --- a/lld/MachO/OutputSegment.h +++ b/lld/MachO/OutputSegment.h @@ -23,6 +23,7 @@ constexpr const char linkEdit[] = "__LINKEDIT"; constexpr const char dataConst[] = "__DATA_CONST"; constexpr const char ld[] = "__LD"; // output only with -r +constexpr const char dwarf[] = "__DWARF"; } // namespace segment_names diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -20,6 +20,10 @@ #include "llvm/ADT/SetVector.h" #include "llvm/Support/raw_ostream.h" +namespace llvm { +class DWARFUnit; +} // namespace llvm + namespace lld { namespace macho { @@ -48,6 +52,7 @@ class Defined; class DylibSymbol; class LoadCommand; +class ObjFile; class SyntheticSection : public OutputSection { public: @@ -405,16 +410,32 @@ size_t strx; }; +struct StabsEntry { + uint8_t type; + uint32_t strx = 0; + uint8_t sect = 0; + uint16_t desc = 0; + uint64_t value = 0; + + explicit StabsEntry(uint8_t type) : type(type) {} +}; + class SymtabSection : public LinkEditSection { public: SymtabSection(StringTableSection &); void finalizeContents(); - size_t getNumSymbols() const { return symbols.size(); } + size_t getNumSymbols() const { return stabs.size() + symbols.size(); } uint64_t getRawSize() const override; void writeTo(uint8_t *buf) const override; private: + void emitBeginSourceStab(llvm::DWARFUnit *compileUnit); + void emitEndSourceStab(); + void emitObjectFileStab(ObjFile *); + void emitFunStabs(Defined *); + StringTableSection &stringTableSection; + std::vector stabs; std::vector symbols; }; diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -20,7 +20,9 @@ #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "llvm/Support/EndianStream.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/LEB128.h" +#include "llvm/Support/Path.h" using namespace llvm; using namespace llvm::support; @@ -574,17 +576,100 @@ stringTableSection(stringTableSection) {} uint64_t SymtabSection::getRawSize() const { - return symbols.size() * sizeof(structs::nlist_64); + return getNumSymbols() * sizeof(structs::nlist_64); +} + +void SymtabSection::emitBeginSourceStab(DWARFUnit *compileUnit) { + StabsEntry stab(MachO::N_SO); + SmallString<261> dir(compileUnit->getCompilationDir()); + StringRef sep = sys::path::get_separator(); + // We don't use `path::append` here because we want an empty `dir` to result + // in an absolute path. `append` would give us a relative path for that case. + if (!dir.endswith(sep)) + dir += sep; + stab.strx = stringTableSection.addString( + saver.save(dir + compileUnit->getUnitDIE().getShortName())); + stabs.emplace_back(std::move(stab)); +} + +void SymtabSection::emitEndSourceStab() { + StabsEntry stab(MachO::N_SO); + stab.sect = 1; + stabs.emplace_back(std::move(stab)); +} + +void SymtabSection::emitObjectFileStab(ObjFile *file) { + StabsEntry stab(MachO::N_OSO); + stab.sect = target->cpuSubtype; + SmallString<261> path(file->getName()); + std::error_code ec = sys::fs::make_absolute(path); + if (ec) + fatal("failed to get absolute path for " + file->getName()); + + stab.strx = stringTableSection.addString(saver.save(path.str())); + stab.desc = 1; + stabs.emplace_back(std::move(stab)); +} + +void SymtabSection::emitFunStabs(Defined *defined) { + { + StabsEntry stab(MachO::N_FUN); + stab.sect = 1; + stab.strx = stringTableSection.addString(defined->getName()); + stab.value = defined->getVA(); + stabs.emplace_back(std::move(stab)); + } + + { + StabsEntry stab(MachO::N_FUN); + // FIXME this should be the size of the symbol. Using the section size in + // lieu is only correct if .subsections_via_symbols is set. + stab.value = defined->isec->getSize(); + stabs.emplace_back(std::move(stab)); + } } void SymtabSection::finalizeContents() { - // TODO support other symbol types + InputFile *lastFile = nullptr; for (Symbol *sym : symtab->getSymbols()) { + // TODO support other symbol types if (isa(sym) || sym->isInGot() || sym->isInStubs()) { sym->symtabIndex = symbols.size(); symbols.push_back({sym, stringTableSection.addString(sym->getName())}); } + + // Emit STABS symbols so that dsymutil and/or the debugger can map address + // regions in the final binary to the source and object files from which + // they originated. + if (auto *defined = dyn_cast(sym)) { + if (defined->isAbsolute()) + continue; + + InputSection *isec = defined->isec; + // XXX is it right to assume that all symbols in __text are function + // symbols? + if (isec->name == "__text") { + ObjFile *file = dyn_cast(isec->file); + assert(file); + if (!file->compileUnit) + continue; + + if (lastFile == nullptr || lastFile != file) { + if (lastFile != nullptr) + emitEndSourceStab(); + lastFile = file; + + emitBeginSourceStab(file->compileUnit); + emitObjectFileStab(file); + } + emitFunStabs(defined); + } + // TODO emit stabs for non-function symbols too + } } + + if (!stabs.empty()) + emitEndSourceStab(); } void SymtabSection::writeTo(uint8_t *buf) const { @@ -602,12 +687,23 @@ nList->n_type = MachO::N_EXT | MachO::N_SECT; nList->n_sect = defined->isec->parent->index; // For the N_SECT symbol type, n_value is the address of the symbol - nList->n_value = defined->value + defined->isec->getVA(); + nList->n_value = defined->getVA(); } nList->n_desc |= defined->isWeakDef() ? MachO::N_WEAK_DEF : 0; } ++nList; } + + // Emit the stabs entries after the "real" symbols. We cannot emit them + // before as that would render Symbol::symtabIndex inaccurate. + for (const StabsEntry &entry : stabs) { + nList->n_strx = entry.strx; + nList->n_type = entry.type; + nList->n_sect = entry.sect; + nList->n_desc = entry.desc; + nList->n_value = entry.value; + ++nList; + } } IndirectSymtabSection::IndirectSymtabSection() @@ -656,7 +752,7 @@ uint32_t StringTableSection::addString(StringRef str) { uint32_t strx = size; - strings.push_back(str); + strings.push_back(str); // TODO: consider deduplicating strings size += str.size() + 1; // account for null terminator return strx; } diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -578,6 +578,10 @@ MapVector, MergedOutputSection *> mergedOutputSections; for (InputSection *isec : inputSections) { + // Instead of emitting DWARF sections, we emit STABS symbols to the object + // files that contain them. + if (isDebugSection(isec->flags) && isec->segname == segment_names::dwarf) + continue; MergedOutputSection *&osec = mergedOutputSections[{isec->segname, isec->name}]; if (osec == nullptr) @@ -591,8 +595,9 @@ if (unwindInfoSection && segname == segment_names::ld) { assert(osec->name == section_names::compactUnwind); unwindInfoSection->setCompactUnwindSection(osec); - } else + } else { getOrCreateOutputSegment(segname)->addOutputSection(osec); + } } for (SyntheticSection *ssec : syntheticSections) { diff --git a/lld/test/MachO/stabs.s b/lld/test/MachO/stabs.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/stabs.s @@ -0,0 +1,114 @@ +# REQUIRES: x86 +# UNSUPPORTED: system-windows +# RUN: split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/foo.s -o %t/foo.o + +# RUN: %lld -lSystem %t/test.o %t/foo.o -o %t/test +# RUN: llvm-nm -pa %t/test | FileCheck %s -DDIR=%t + +## Check that we emit absolute paths to the object files in our OSO entries +## even if our inputs are relative paths. +# RUN: cd %t && %lld -lSystem test.o foo.o -o test +# RUN: llvm-nm -pa %t/test | FileCheck %s -DDIR=%t + +# CHECK-DAG: [[#%x, MAIN:]] T _main +# CHECK-DAG: [[#%x, FOO: ]] T _foo +# CHECK: 0000000000000000 - 00 0000 SO /tmp/test.cpp +# CHECK-NEXT: 0000000000000000 - 03 0001 OSO [[DIR]]/test.o +# CHECK-NEXT: [[#MAIN]] - 01 0000 FUN _main +# CHECK-NEXT: 0000000000000001 - 00 0000 FUN +# CHECK-NEXT: 0000000000000000 - 01 0000 SO +# CHECK-NEXT: 0000000000000000 - 00 0000 SO /foo.cpp +# CHECK-NEXT: 0000000000000000 - 03 0001 OSO [[DIR]]/foo.o +# CHECK-NEXT: [[#FOO]] - 01 0000 FUN _foo +# CHECK-NEXT: 0000000000000001 - 00 0000 FUN +# CHECK-NEXT: 0000000000000000 - 01 0000 SO + +#--- test.s +.text +.globl _main +_main: +Lfunc_begin0: + retq +Lfunc_end0: + +.section __DWARF,__debug_str,regular,debug + .asciz "test.cpp" ## string offset=0 + .asciz "/tmp" ## string offset=9 +.section __DWARF,__debug_abbrev,regular,debug +Lsection_abbrev: + .byte 1 ## Abbreviation Code + .byte 17 ## DW_TAG_compile_unit + .byte 1 ## DW_CHILDREN_yes + .byte 3 ## DW_AT_name + .byte 14 ## DW_FORM_strp + .byte 27 ## DW_AT_comp_dir + .byte 14 ## DW_FORM_strp + .byte 17 ## DW_AT_low_pc + .byte 1 ## DW_FORM_addr + .byte 18 ## DW_AT_high_pc + .byte 6 ## DW_FORM_data4 + .byte 0 ## EOM(1) +.section __DWARF,__debug_info,regular,debug +.set Lset0, Ldebug_info_end0-Ldebug_info_start0 ## Length of Unit + .long Lset0 +Ldebug_info_start0: + .short 4 ## DWARF version number +.set Lset1, Lsection_abbrev-Lsection_abbrev ## Offset Into Abbrev. Section + .long Lset1 + .byte 8 ## Address Size (in bytes) + .byte 1 ## Abbrev [1] 0xb:0x48 DW_TAG_compile_unit + .long 0 ## DW_AT_name + .long 9 ## DW_AT_comp_dir + .quad Lfunc_begin0 ## DW_AT_low_pc +.set Lset3, Lfunc_end0-Lfunc_begin0 ## DW_AT_high_pc + .long Lset3 + .byte 0 ## End Of Children Mark +Ldebug_info_end0: +.subsections_via_symbols +.section __DWARF,__debug_line,regular,debug + +#--- foo.s +.text +.globl _foo +_foo: +Lfunc_begin0: + retq +Lfunc_end0: + +.section __DWARF,__debug_str,regular,debug + .asciz "foo.cpp" ## string offset=0 + .asciz "" ## string offset=8 +.section __DWARF,__debug_abbrev,regular,debug +Lsection_abbrev: + .byte 1 ## Abbreviation Code + .byte 17 ## DW_TAG_compile_unit + .byte 1 ## DW_CHILDREN_yes + .byte 3 ## DW_AT_name + .byte 14 ## DW_FORM_strp + .byte 27 ## DW_AT_comp_dir + .byte 14 ## DW_FORM_strp + .byte 17 ## DW_AT_low_pc + .byte 1 ## DW_FORM_addr + .byte 18 ## DW_AT_high_pc + .byte 6 ## DW_FORM_data4 + .byte 0 ## EOM(1) +.section __DWARF,__debug_info,regular,debug +.set Lset0, Ldebug_info_end0-Ldebug_info_start0 ## Length of Unit + .long Lset0 +Ldebug_info_start0: + .short 4 ## DWARF version number +.set Lset1, Lsection_abbrev-Lsection_abbrev ## Offset Into Abbrev. Section + .long Lset1 + .byte 8 ## Address Size (in bytes) + .byte 1 ## Abbrev [1] 0xb:0x48 DW_TAG_compile_unit + .long 0 ## DW_AT_name + .long 8 ## DW_AT_comp_dir + .quad Lfunc_begin0 ## DW_AT_low_pc +.set Lset3, Lfunc_end0-Lfunc_begin0 ## DW_AT_high_pc + .long Lset3 + .byte 0 ## End Of Children Mark +Ldebug_info_end0: +.subsections_via_symbols +.section __DWARF,__debug_line,regular,debug