diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -198,8 +198,8 @@ static InputSection *findContainingSubsection(SubsectionMap &map, uint64_t *offset) { auto it = std::prev(llvm::upper_bound( - map, *offset, [](uint64_t value, SubsectionEntry subsectionEntry) { - return value < subsectionEntry.offset; + map, *offset, [](uint64_t value, SubsectionEntry subsecEntry) { + return value < subsecEntry.offset; })); *offset -= it->offset; return it->isec; @@ -423,96 +423,79 @@ void ObjFile::parseSymbols(ArrayRef sectionHeaders, ArrayRef nList, const char *strtab, bool subsectionsViaSymbols) { - using Section = typename LP::section; using NList = typename LP::nlist; - // Precompute the boundaries of symbols within a section. - // If subsectionsViaSymbols is True then the corresponding subsections will be - // created, otherwise these boundaries are used for the calculation of symbols - // sizes only. - for (const NList &sym : nList) { - if ((sym.n_type & N_TYPE) == N_SECT && !(sym.n_desc & N_ALT_ENTRY) && - !subsections[sym.n_sect - 1].empty()) { - SubsectionMap &subsecMapping = subsections[sym.n_sect - 1]; - subsecMapping.push_back( - {sym.n_value - sectionHeaders[sym.n_sect - 1].addr, - subsecMapping.front().isec}); - } - } - - for (SubsectionMap &subsecMap : subsections) { - if (subsecMap.empty()) - continue; - llvm::sort(subsecMap, - [](const SubsectionEntry &lhs, const SubsectionEntry &rhs) { - return lhs.offset < rhs.offset; - }); - subsecMap.erase( - std::unique(subsecMap.begin(), subsecMap.end(), - [](const SubsectionEntry &lhs, const SubsectionEntry &rhs) { - return lhs.offset == rhs.offset; - }), - subsecMap.end()); - if (!subsectionsViaSymbols) - continue; - for (size_t i = 0; i < subsecMap.size(); ++i) { - uint32_t offset = subsecMap[i].offset; - InputSection *&isec = subsecMap[i].isec; - uint32_t end = i + 1 < subsecMap.size() ? subsecMap[i + 1].offset - : isec->data.size(); - isec = make(*isec); - isec->data = isec->data.slice(offset, end - offset); - // TODO: ld64 appears to preserve the original alignment as well as each - // subsection's offset from the last aligned address. We should consider - // emulating that behavior. - isec->align = MinAlign(isec->align, offset); - } - } - + // Groups indices of the symbols by the sections that contain them. + std::vector> symbolsBySection(subsections.size()); symbols.resize(nList.size()); - for (size_t i = 0, n = nList.size(); i < n; ++i) { + for (uint32_t i = 0; i < nList.size(); ++i) { const NList &sym = nList[i]; StringRef name = strtab + sym.n_strx; - - if ((sym.n_type & N_TYPE) != N_SECT) { + if ((sym.n_type & N_TYPE) == N_SECT) { + SubsectionMap &subsecMap = subsections[sym.n_sect - 1]; + // parseSections() may have chosen not to parse this section. + if (subsecMap.empty()) + continue; + symbolsBySection[sym.n_sect - 1].push_back(i); + } else { symbols[i] = parseNonSectionSymbol(sym, name); - continue; } + } - const Section &sec = sectionHeaders[sym.n_sect - 1]; - SubsectionMap &subsecMap = subsections[sym.n_sect - 1]; - - // parseSections() may have chosen not to parse this section. + // Calculate symbol sizes and create subsections by splitting the sections + // along symbol boundaries. + for (size_t i = 0; i < subsections.size(); ++i) { + SubsectionMap &subsecMap = subsections[i]; if (subsecMap.empty()) continue; - uint64_t offset = sym.n_value - sec.addr; - - auto it = llvm::upper_bound( - subsecMap, offset, [](uint64_t value, SubsectionEntry subsectionEntry) { - return value < subsectionEntry.offset; - }); - uint32_t size = it != subsecMap.end() - ? it->offset - offset - : subsecMap.front().isec->getSize() - offset; - - // If the input file does not use subsections-via-symbols, all symbols can - // use the same subsection. Otherwise, we must split the sections along - // symbol boundaries. - if (!subsectionsViaSymbols) { - symbols[i] = - createDefined(sym, name, subsecMap.front().isec, offset, size); - continue; - } + std::vector &symbolIndices = symbolsBySection[i]; + llvm::sort(symbolIndices, [&](uint32_t lhs, uint32_t rhs) { + return nList[lhs].n_value < nList[rhs].n_value; + }); + uint64_t sectionAddr = sectionHeaders[i].addr; + + // We populate subsecMap by repeatedly splitting the last (highest address) + // subsection. + SubsectionEntry subsecEntry = subsecMap.back(); + for (size_t j = 0; j < symbolIndices.size(); ++j) { + uint32_t symIndex = symbolIndices[j]; + const NList &sym = nList[symIndex]; + StringRef name = strtab + sym.n_strx; + InputSection *isec = subsecEntry.isec; + + uint64_t subsecAddr = sectionAddr + subsecEntry.offset; + uint64_t symbolOffset = sym.n_value - subsecAddr; + uint64_t symbolSize = + j + 1 < symbolIndices.size() + ? nList[symbolIndices[j + 1]].n_value - sym.n_value + : isec->data.size() - symbolOffset; + // There are 3 cases where we do not need to create a new subsection: + // 1. If the input file does not use subsections-via-symbols. + // 2. Multiple symbols at the same address only induce one subsection. + // 3. Alternative entry points do not induce new subsections. + if (!subsectionsViaSymbols || symbolOffset == 0 || + sym.n_desc & N_ALT_ENTRY) { + symbols[symIndex] = + createDefined(sym, name, isec, symbolOffset, symbolSize); + continue; + } - InputSection *subsec = (--it)->isec; - symbols[i] = createDefined(sym, name, subsec, offset - it->offset, size); - } + auto *nextIsec = make(*isec); + nextIsec->data = isec->data.slice(symbolOffset); + isec->data = isec->data.slice(0, symbolOffset); - if (!subsectionsViaSymbols) - for (SubsectionMap &subsecMap : subsections) - if (!subsecMap.empty()) - subsecMap = {subsecMap.front()}; + // By construction, the symbol will be at offset zero in the new section. + symbols[symIndex] = + createDefined(sym, name, nextIsec, /*value=*/0, symbolSize); + // TODO: ld64 appears to preserve the original alignment as well as each + // subsection's offset from the last aligned address. We should consider + // emulating that behavior. + nextIsec->align = MinAlign(isec->align, sym.n_value); + subsecMap.push_back({sym.n_value - sectionAddr, nextIsec}); + subsecEntry = subsecMap.back(); + } + } } OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName, diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp --- a/lld/MachO/SymbolTable.cpp +++ b/lld/MachO/SymbolTable.cpp @@ -161,7 +161,7 @@ Defined *SymbolTable::addSynthetic(StringRef name, InputSection *isec, uint32_t value, bool isPrivateExtern, bool includeInSymtab) { - Defined *s = addDefined(name, nullptr, isec, value, /*size*/ 0, + Defined *s = addDefined(name, nullptr, isec, value, /*size=*/0, /*isWeakDef=*/false, isPrivateExtern); s->includeInSymtab = includeInSymtab; return s; diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h --- a/lld/MachO/Symbols.h +++ b/lld/MachO/Symbols.h @@ -118,7 +118,10 @@ static bool classof(const Symbol *s) { return s->kind() == DefinedKind; } InputSection *isec; + // Contains the offset from the containing subsection. Note that this is + // different from nlist::n_value, which is the absolute address of the symbol. uint64_t value; + // size is only calculated for regular (non-bitcode) symbols. uint64_t size; bool overridesWeakDef : 1; diff --git a/lld/test/MachO/stabs.s b/lld/test/MachO/stabs.s --- a/lld/test/MachO/stabs.s +++ b/lld/test/MachO/stabs.s @@ -42,25 +42,41 @@ # CHECK-NEXT: [[#%x, STATIC:]] - 0[[#MORE_DATA_ID + 1]] 0000 STSYM _static_var # CHECK-NEXT: [[#%x, MAIN:]] - 0[[#TEXT_ID + 1]] 0000 FUN _main # CHECK-NEXT: 0000000000000006 - 00 0000 FUN -# CHECK-NEXT: [[#%x, FUN:]] - 0[[#MORE_TEXT_ID + 1]] 0000 FUN _fun +# CHECK-NEXT: [[#%x, BAR:]] - 0[[#TEXT_ID + 1]] 0000 FUN _bar +# CHECK-NEXT: 0000000000000000 - 00 0000 FUN +# CHECK-NEXT: [[#%x, BAR2:]] - 0[[#TEXT_ID + 1]] 0000 FUN _bar2 # CHECK-NEXT: 0000000000000001 - 00 0000 FUN +# CHECK-NEXT: [[#%x, BAZ:]] - 0[[#TEXT_ID + 1]] 0000 FUN _baz +# CHECK-NEXT: 0000000000000000 - 00 0000 FUN +# CHECK-NEXT: [[#%x, BAZ2:]] - 0[[#TEXT_ID + 1]] 0000 FUN _baz +# CHECK-NEXT: 0000000000000002 - 00 0000 FUN +# CHECK-NEXT: [[#%x, QUX:]] - 0[[#TEXT_ID + 1]] 0000 FUN _qux +# CHECK-NEXT: 0000000000000003 - 00 0000 FUN +# CHECK-NEXT: [[#%x, QUUX:]] - 0[[#TEXT_ID + 1]] 0000 FUN _quux +# CHECK-NEXT: 0000000000000004 - 00 0000 FUN # CHECK-NEXT: [[#%x, GLOB:]] - 0[[#DATA_ID + 1]] 0000 GSYM _global_var # CHECK-NEXT: [[#%x, ZERO:]] - 0[[#COMM_ID + 1]] 0000 GSYM _zero +# CHECK-NEXT: [[#%x, FUN:]] - 0[[#MORE_TEXT_ID + 1]] 0000 FUN _fun +# CHECK-NEXT: 0000000000000001 - 00 0000 FUN # CHECK-NEXT: 0000000000000000 - 01 0000 SO # CHECK-NEXT: 0000000000000000 - 00 0000 SO /foo.cpp # CHECK-NEXT: 0000000000000020 - 03 0001 OSO [[FOO_PATH]] # CHECK-NEXT: [[#%x, FOO:]] - 0[[#TEXT_ID + 1]] 0000 FUN _foo # CHECK-NEXT: 0000000000000001 - 00 0000 FUN # CHECK-NEXT: 0000000000000000 - 01 0000 SO -# CHECK-NEXT: [[#STATIC]] s _static_var -# CHECK-NEXT: [[#MAIN]] T _main -# CHECK-NEXT: {{[0-9af]+}} A _abs -# CHECK-NEXT: [[#FUN]] S _fun -# CHECK-NEXT: [[#GLOB]] D _global_var -# CHECK-NEXT: [[#ZERO]] S _zero -# CHECK-NEXT: [[#FOO]] T _foo -# CHECK-NEXT: {{[0-9af]+}} T _no_debug -# CHECK-NEXT: {{0+}} A __mh_execute_header +# CHECK-DAG: [[#STATIC]] s _static_var +# CHECK-DAG: [[#MAIN]] T _main +# CHECK-DAG: {{[0-9af]+}} A _abs +# CHECK-DAG: [[#BAR]] T _bar +# CHECK-DAG: [[#BAR2]] T _bar2 +# CHECK-DAG: [[#BAZ]] T _baz +# CHECK-DAG: [[#BAZ2]] T _baz2 +# CHECK-DAG: [[#FUN]] S _fun +# CHECK-DAG: [[#GLOB]] D _global_var +# CHECK-DAG: [[#ZERO]] S _zero +# CHECK-DAG: [[#FOO]] T _foo +# CHECK-DAG: {{[0-9af]+}} T _no_debug +# CHECK-DAG: {{0+}} A __mh_execute_header # CHECK-EMPTY: ## Check that we don't attempt to emit rebase opcodes for the debug sections @@ -90,13 +106,30 @@ .zerofill __DATA,__common,_zero,4,2 .text -.globl _main +.globl _main, _bar, _bar2, _baz, _baz2, _qux, _quux +.alt_entry _baz +.alt_entry _qux + +_bar: +_bar2: + .space 1 + +_baz: +_baz2: + .space 2 + _main: Lfunc_begin0: callq _foo retq Lfunc_end0: +_qux: + .space 3 + +_quux: + .space 4 + .section __DWARF,__debug_str,regular,debug .asciz "test.cpp" ## string offset=0 .asciz "/tmp" ## string offset=9 diff --git a/lld/test/MachO/symtab.s b/lld/test/MachO/symtab.s --- a/lld/test/MachO/symtab.s +++ b/lld/test/MachO/symtab.s @@ -36,23 +36,23 @@ # CHECK-NEXT: Value: 0x1{{[0-9a-f]*}} # CHECK-NEXT: } # CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: _external +# CHECK-NEXT: Name: _external_weak # CHECK-NEXT: Extern # CHECK-NEXT: Type: Section (0xE) -# CHECK-NEXT: Section: __data +# CHECK-NEXT: Section: __text (0x1) # CHECK-NEXT: RefType: UndefinedNonLazy (0x0) -# CHECK-NEXT: Flags [ (0x0) +# CHECK-NEXT: Flags [ (0x80) +# CHECK-NEXT: WeakDef (0x80) # CHECK-NEXT: ] # CHECK-NEXT: Value: 0x1{{[0-9a-f]*}} # CHECK-NEXT: } # CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: _external_weak +# CHECK-NEXT: Name: _external # CHECK-NEXT: Extern # CHECK-NEXT: Type: Section (0xE) -# CHECK-NEXT: Section: __text (0x1) +# CHECK-NEXT: Section: __data # CHECK-NEXT: RefType: UndefinedNonLazy (0x0) -# CHECK-NEXT: Flags [ (0x80) -# CHECK-NEXT: WeakDef (0x80) +# CHECK-NEXT: Flags [ (0x0) # CHECK-NEXT: ] # CHECK-NEXT: Value: 0x1{{[0-9a-f]*}} # CHECK-NEXT: }