diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -436,6 +436,34 @@ return false; } +// Replaces common symbols with defined symbols residing in __common sections. +// This function must be called after all symbol names are resolved (i.e. after +// all InputFiles have been loaded.) As a result, later operations won't see +// any CommonSymbols. +static void replaceCommonSymbols() { + for (macho::Symbol *sym : symtab->getSymbols()) { + auto *common = dyn_cast(sym); + if (common == nullptr) + continue; + + auto *isec = make(); + isec->file = common->file; + isec->name = section_names::common; + isec->segname = segment_names::data; + isec->align = common->align; + // Casting to size_t will truncate large values on 32-bit architectures, + // but it's not really worth supporting the linking of 64-bit programs on + // 32-bit archs. + isec->data = {nullptr, static_cast(common->size)}; + isec->flags = S_ZEROFILL; + inputSections.push_back(isec); + + replaceSymbol(sym, sym->getName(), isec, /*value=*/0, + /*isWeakDef=*/false, + /*isExternal=*/true); + } +} + static inline char toLowerDash(char x) { if (x >= 'A' && x <= 'Z') return x - 'A' + 'a'; @@ -641,6 +669,8 @@ error("-sub_library " + searchName + " does not match a supplied dylib"); } + replaceCommonSymbols(); + StringRef orderFile = args.getLastArgValue(OPT_order_file); if (!orderFile.empty()) parseOrderFile(orderFile); diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -243,10 +243,12 @@ for (size_t i = 0, n = nList.size(); i < n; ++i) { const structs::nlist_64 &sym = nList[i]; - // Undefined symbol - if (!sym.n_sect) { + if ((sym.n_type & N_TYPE) == N_UNDF) { StringRef name = strtab + sym.n_strx; - symbols[i] = symtab->addUndefined(name); + symbols[i] = sym.n_value == 0 + ? symtab->addUndefined(name) + : symtab->addCommon(name, this, sym.n_value, + 1 << GET_COMM_ALIGN(sym.n_desc)); continue; } diff --git a/lld/MachO/SymbolTable.h b/lld/MachO/SymbolTable.h --- a/lld/MachO/SymbolTable.h +++ b/lld/MachO/SymbolTable.h @@ -19,6 +19,7 @@ class ArchiveFile; class DylibFile; +class InputFile; class InputSection; class MachHeaderSection; class Symbol; @@ -36,6 +37,8 @@ Symbol *addUndefined(StringRef name); + Symbol *addCommon(StringRef name, InputFile *, uint64_t size, uint32_t align); + Symbol *addDylib(StringRef name, DylibFile *file, bool isWeakDef, bool isTlv); Symbol *addLazy(StringRef name, ArchiveFile *file, diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp --- a/lld/MachO/SymbolTable.cpp +++ b/lld/MachO/SymbolTable.cpp @@ -74,6 +74,26 @@ return s; } +Symbol *SymbolTable::addCommon(StringRef name, InputFile *file, uint64_t size, + uint32_t align) { + Symbol *s; + bool wasInserted; + std::tie(s, wasInserted) = insert(name); + + if (!wasInserted) { + if (auto *common = dyn_cast(s)) { + if (size < common->size) + return s; + } else if (!isa(s)) { + error("TODO: implement common symbol resolution with other symbol kinds"); + return s; + } + } + + replaceSymbol(s, name, file, size, align); + return s; +} + Symbol *SymbolTable::addDylib(StringRef name, DylibFile *file, bool isWeakDef, bool isTlv) { Symbol *s; diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h --- a/lld/MachO/Symbols.h +++ b/lld/MachO/Symbols.h @@ -14,6 +14,7 @@ #include "lld/Common/ErrorHandler.h" #include "lld/Common/Strings.h" #include "llvm/Object/Archive.h" +#include "llvm/Support/MathExtras.h" namespace lld { namespace macho { @@ -36,6 +37,7 @@ enum Kind { DefinedKind, UndefinedKind, + CommonKind, DylibKind, LazyKind, DSOHandleKind, @@ -117,6 +119,35 @@ static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; } }; +// On Unix, it is traditionally allowed to write variable definitions without +// initialization expressions (such as "int foo;") to header files. These are +// called tentative definitions. +// +// Using tentative definitions is usually considered a bad practice; you should +// write only declarations (such as "extern int foo;") to header files. +// Nevertheless, the linker and the compiler have to do something to support +// bad code by allowing duplicate definitions for this particular case. +// +// The compiler creates common symbols when it sees tentative definitions. +// (You can suppress this behavior and let the compiler create a regular +// defined symbol by passing -fno-common.) When linking the final binary, if +// there are remaining common symbols after name resolution is complete, the +// linker converts them to regular defined symbols in a __common section. +class CommonSymbol : public Symbol { +public: + CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align) + : Symbol(CommonKind, name), file(file), size(size), + align(align != 1 ? align : llvm::PowerOf2Ceil(size)) { + // TODO: cap maximum alignment + } + + static bool classof(const Symbol *s) { return s->kind() == CommonKind; } + + InputFile *const file; + const uint64_t size; + const uint32_t align; +}; + class DylibSymbol : public Symbol { public: DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef, bool isTlv) @@ -185,8 +216,10 @@ union SymbolUnion { alignas(Defined) char a[sizeof(Defined)]; alignas(Undefined) char b[sizeof(Undefined)]; - alignas(DylibSymbol) char c[sizeof(DylibSymbol)]; - alignas(LazySymbol) char d[sizeof(LazySymbol)]; + alignas(CommonSymbol) char c[sizeof(CommonSymbol)]; + alignas(DylibSymbol) char d[sizeof(DylibSymbol)]; + alignas(LazySymbol) char e[sizeof(LazySymbol)]; + alignas(DSOHandle) char f[sizeof(DSOHandle)]; }; template diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -26,6 +26,7 @@ namespace section_names { constexpr const char pageZero[] = "__pagezero"; +constexpr const char common[] = "__common"; constexpr const char header[] = "__mach_header"; constexpr const char binding[] = "__binding"; constexpr const char weakBinding[] = "__weak_binding"; diff --git a/lld/test/MachO/common-symbol-coalescing.s b/lld/test/MachO/common-symbol-coalescing.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/common-symbol-coalescing.s @@ -0,0 +1,83 @@ +# REQUIRES: x86 +# RUN: split-file %s %t + +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/same-size.s -o %t/same-size.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/smaller-size.s -o %t/smaller-size.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/zero-align.s -o %t/zero-align.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/zero-align-round-up.s -o %t/zero-align-round-up.o + +## Check that we pick the definition with the larger size, regardless of +## its alignment. +# RUN: lld -flavor darwinnew %t/test.o %t/smaller-size.o -order_file %t/order -o %t/test +# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=SMALLER-ALIGNMENT +# RUN: lld -flavor darwinnew %t/smaller-size.o %t/test.o -order_file %t/order -o %t/test +# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=SMALLER-ALIGNMENT + +## When the sizes are equal, we pick the symbol whose file occurs later in the +## command-line argument list. +# RUN: lld -flavor darwinnew %t/test.o %t/same-size.o -order_file %t/order -o %t/test +# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=LARGER-ALIGNMENT +# RUN: lld -flavor darwinnew %t/same-size.o %t/test.o -order_file %t/order -o %t/test +# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=SMALLER-ALIGNMENT + +# RUN: lld -flavor darwinnew %t/test.o %t/zero-align.o -order_file %t/order -o %t/test +# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=LARGER-ALIGNMENT +# RUN: lld -flavor darwinnew %t/zero-align.o %t/test.o -order_file %t/order -o %t/test +# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=LARGER-ALIGNMENT + +# RUN: lld -flavor darwinnew %t/test.o %t/zero-align-round-up.o -order_file %t/order -o %t/test +# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=LARGER-ALIGNMENT +# RUN: lld -flavor darwinnew %t/zero-align-round-up.o %t/test.o -order_file %t/order -o %t/test +# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=LARGER-ALIGNMENT + +# SMALLER-ALIGNMENT-LABEL: Sections: +# SMALLER-ALIGNMENT: __common {{[0-9a-f]+}} [[#%x, COMMON_START:]] BSS + +# SMALLER-ALIGNMENT-LABEL: SYMBOL TABLE: +# SMALLER-ALIGNMENT-DAG: [[#COMMON_START]] g O __DATA,__common _check_size +# SMALLER-ALIGNMENT-DAG: [[#COMMON_START + 2]] g O __DATA,__common _end_marker +# SMALLER-ALIGNMENT-DAG: [[#COMMON_START + 8]] g O __DATA,__common _check_alignment + +# LARGER-ALIGNMENT-LABEL: Sections: +# LARGER-ALIGNMENT: __common {{[0-9a-f]+}} [[#%x, COMMON_START:]] BSS + +# LARGER-ALIGNMENT-LABEL: SYMBOL TABLE: +# LARGER-ALIGNMENT-DAG: [[#COMMON_START]] g O __DATA,__common _check_size +# LARGER-ALIGNMENT-DAG: [[#COMMON_START + 2]] g O __DATA,__common _end_marker +# LARGER-ALIGNMENT-DAG: [[#COMMON_START + 16]] g O __DATA,__common _check_alignment + +#--- order +## Order is important as we determine the size of a given symbol via the +## address of the next symbol. +_check_size +_end_marker +_check_alignment + +#--- smaller-size.s +.comm _check_size, 1, 1 +.comm _check_alignment, 1, 4 + +#--- same-size.s +.comm _check_size, 2, 1 +.comm _check_alignment, 2, 4 + +#--- zero-align.s +.comm _check_size, 2, 1 +## If alignment is set to zero, use the size to determine the alignment. +.comm _check_alignment, 16, 0 + +#--- zero-align-round-up.s +.comm _check_size, 2, 1 +## If alignment is set to zero, use the size to determine the alignment. If the +## size is not a power of two, round it up. (In this case, 14 rounds to 16.) +.comm _check_alignment, 14, 0 + +#--- test.s +.comm _check_size, 2, 1 +.comm _end_marker, 1 +.comm _check_alignment, 2, 3 + +.globl _main +_main: + ret