diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -283,7 +283,7 @@ } else if (config->forceLoadObjC) { for (const object::Archive::Symbol &sym : file->symbols()) if (sym.getName().startswith(objc::klass)) - symtab->addUndefined(sym.getName()); + symtab->addUndefined(sym.getName(), /*isWeakRef=*/false); // TODO: no need to look for ObjC sections for a given archive member if // we already found that it contains an ObjC symbol. We should also @@ -742,7 +742,8 @@ symtab = make(); target = createTargetInfo(args); - config->entry = symtab->addUndefined(args.getLastArgValue(OPT_e, "_main")); + config->entry = symtab->addUndefined(args.getLastArgValue(OPT_e, "_main"), + /*isWeakRef=*/false); config->outputFile = args.getLastArgValue(OPT_o, "a.out"); config->installName = args.getLastArgValue(OPT_install_name, config->outputFile); diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -283,7 +283,7 @@ switch (type) { case N_UNDF: return sym.n_value == 0 - ? symtab->addUndefined(name) + ? symtab->addUndefined(name, sym.n_desc & N_WEAK_REF) : symtab->addCommon(name, this, sym.n_value, 1 << GET_COMM_ALIGN(sym.n_desc)); case N_ABS: diff --git a/lld/MachO/SymbolTable.h b/lld/MachO/SymbolTable.h --- a/lld/MachO/SymbolTable.h +++ b/lld/MachO/SymbolTable.h @@ -35,7 +35,7 @@ Symbol *addDefined(StringRef name, InputSection *isec, uint32_t value, bool isWeakDef); - Symbol *addUndefined(StringRef name); + Symbol *addUndefined(StringRef name, bool isWeakRef); Symbol *addCommon(StringRef name, InputFile *, uint64_t size, uint32_t align); diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp --- a/lld/MachO/SymbolTable.cpp +++ b/lld/MachO/SymbolTable.cpp @@ -62,15 +62,21 @@ return s; } -Symbol *SymbolTable::addUndefined(StringRef name) { +Symbol *SymbolTable::addUndefined(StringRef name, bool isWeakRef) { Symbol *s; bool wasInserted; std::tie(s, wasInserted) = insert(name); + auto refState = isWeakRef ? RefState::Weak : RefState::Strong; + if (wasInserted) - replaceSymbol(s, name); - else if (LazySymbol *lazy = dyn_cast(s)) + replaceSymbol(s, name, refState); + else if (auto *lazy = dyn_cast(s)) lazy->fetchArchiveMember(); + else if (auto *dynsym = dyn_cast(s)) + dynsym->refState = std::max(dynsym->refState, refState); + else if (auto *undefined = dyn_cast(s)) + undefined->refState = std::max(undefined->refState, refState); return s; } @@ -101,14 +107,21 @@ bool wasInserted; std::tie(s, wasInserted) = insert(name); - if (!wasInserted && isWeakDef) - if (auto *defined = dyn_cast(s)) - if (!defined->isWeakDef()) + auto refState = RefState::Unreferenced; + if (!wasInserted) { + if (auto *defined = dyn_cast(s)) { + if (isWeakDef && !defined->isWeakDef()) defined->overridesWeakDef = true; + } else if (auto *undefined = dyn_cast(s)) { + refState = undefined->refState; + } else if (auto *dysym = dyn_cast(s)) { + refState = dysym->refState; + } + } if (wasInserted || isa(s) || (isa(s) && !isWeakDef && s->isWeakDef())) - replaceSymbol(s, file, name, isWeakDef, isTlv); + replaceSymbol(s, file, name, isWeakDef, refState, isTlv); return s; } diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h --- a/lld/MachO/Symbols.h +++ b/lld/MachO/Symbols.h @@ -55,7 +55,12 @@ llvm_unreachable("attempt to get an offset from a non-defined symbol"); } - virtual bool isWeakDef() const { llvm_unreachable("cannot be weak"); } + virtual bool isWeakDef() const { llvm_unreachable("cannot be weak def"); } + + // Only undefined or dylib symbols can be weak references. A weak reference + // need not be satisfied at runtime, e.g. due to the symbol not being + // available on a given target platform. + virtual bool isWeakRef() const { llvm_unreachable("cannot be a weak ref"); } virtual bool isTlv() const { llvm_unreachable("cannot be TLV"); } @@ -111,11 +116,21 @@ const bool external : 1; }; +// Indicates whether & how a dylib symbol is referenced. +enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 }; + class Undefined : public Symbol { public: - Undefined(StringRefZ name) : Symbol(UndefinedKind, name) {} + Undefined(StringRefZ name, RefState refState) + : Symbol(UndefinedKind, name), refState(refState) { + assert(refState != refState::Unreferenced); + } + + bool isWeakRef() const override { return refState == RefState::Weak; } static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; } + + RefState refState : 2; }; // On Unix, it is traditionally allowed to write variable definitions without @@ -149,10 +164,14 @@ class DylibSymbol : public Symbol { public: - DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef, bool isTlv) - : Symbol(DylibKind, name), file(file), weakDef(isWeakDef), tlv(isTlv) {} + DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef, + RefState refState, bool isTlv) + : Symbol(DylibKind, name), file(file), refState(refState), + weakDef(isWeakDef), tlv(isTlv) {} bool isWeakDef() const override { return weakDef; } + bool isWeakRef() const override { return refState == RefState::Weak; } + bool isReferenced() const { return refState != RefState::Unreferenced; } bool isTlv() const override { return tlv; } bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; } @@ -162,9 +181,11 @@ uint32_t stubsHelperIndex = UINT32_MAX; uint32_t lazyBindOffset = UINT32_MAX; + RefState refState : 2; + private: - const bool weakDef; - const bool tlv; + const bool weakDef : 1; + const bool tlv : 1; }; class LazySymbol : public Symbol { diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -227,7 +227,8 @@ // lastBinding. static void encodeBinding(const Symbol *sym, const OutputSection *osec, uint64_t outSecOff, int64_t addend, - Binding &lastBinding, raw_svector_ostream &os) { + bool isWeakBinding, Binding &lastBinding, + raw_svector_ostream &os) { using namespace llvm::MachO; OutputSegment *seg = osec->parent; uint64_t offset = osec->getSegmentOffset() + outSecOff; @@ -249,8 +250,11 @@ lastBinding.addend = addend; } - os << static_cast(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM) - << sym->getName() << '\0' + uint8_t flags = BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM; + if (!isWeakBinding && sym->isWeakRef()) + flags |= BIND_SYMBOL_FLAGS_WEAK_IMPORT; + + os << flags << sym->getName() << '\0' << static_cast(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER) << static_cast(BIND_OPCODE_DO_BIND); // DO_BIND causes dyld to both perform the binding and increment the offset @@ -308,10 +312,11 @@ encodeDylibOrdinal(b.dysym, lastBinding, os); if (auto *isec = b.target.section.dyn_cast()) { encodeBinding(b.dysym, isec->parent, isec->outSecOff + b.target.offset, - b.addend, lastBinding, os); + b.addend, /*isWeakBinding=*/false, lastBinding, os); } else { auto *osec = b.target.section.get(); - encodeBinding(b.dysym, osec, b.target.offset, b.addend, lastBinding, os); + encodeBinding(b.dysym, osec, b.target.offset, b.addend, + /*isWeakBinding=*/false, lastBinding, os); } } if (!bindings.empty()) @@ -341,10 +346,11 @@ for (const WeakBindingEntry &b : bindings) { if (auto *isec = b.target.section.dyn_cast()) { encodeBinding(b.symbol, isec->parent, isec->outSecOff + b.target.offset, - b.addend, lastBinding, os); + b.addend, /*isWeakBinding=*/true, lastBinding, os); } else { auto *osec = b.target.section.get(); - encodeBinding(b.symbol, osec, b.target.offset, b.addend, lastBinding, os); + encodeBinding(b.symbol, osec, b.target.offset, b.addend, + /*isWeakBinding=*/true, lastBinding, os); } } if (!bindings.empty() || !definitions.empty()) @@ -525,8 +531,11 @@ encodeULEB128(sym.file->ordinal, os); } - os << static_cast(MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM) - << sym.getName() << '\0' + uint8_t flags = MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM; + if (sym.isWeakRef()) + flags |= MachO::BIND_SYMBOL_FLAGS_WEAK_IMPORT; + + os << flags << sym.getName() << '\0' << static_cast(MachO::BIND_OPCODE_DO_BIND) << static_cast(MachO::BIND_OPCODE_DONE); return opstreamOffset; @@ -705,8 +714,9 @@ if (auto *defined = dyn_cast(sym)) { assert(defined->isExternal()); externalSymbols.push_back({sym, strx}); - } else if (sym->isInGot() || sym->isInStubs()) { - undefinedSymbols.push_back({sym, strx}); + } else if (auto *dysym = dyn_cast(sym)) { + if (dysym->isReferenced()) + undefinedSymbols.push_back({sym, strx}); } } @@ -756,6 +766,8 @@ } else if (auto *dysym = dyn_cast(entry.sym)) { uint16_t n_desc = nList->n_desc; MachO::SET_LIBRARY_ORDINAL(n_desc, dysym->file->ordinal); + nList->n_type = MachO::N_EXT; + n_desc |= dysym->isWeakRef() ? MachO::N_WEAK_REF : 0; nList->n_desc = n_desc; } ++nList; diff --git a/lld/test/MachO/symtab.s b/lld/test/MachO/symtab.s --- a/lld/test/MachO/symtab.s +++ b/lld/test/MachO/symtab.s @@ -59,6 +59,7 @@ # CHECK-NEXT: } # CHECK-NEXT: Symbol { # CHECK-NEXT: Name: dyld_stub_binder (15) +# CHECK-NEXT: Extern # CHECK-NEXT: Type: Undef (0x0) # CHECK-NEXT: Section: (0x0) # CHECK-NEXT: RefType: UndefinedNonLazy (0x0) @@ -68,6 +69,7 @@ # CHECK-NEXT: } # CHECK-NEXT: Symbol { # CHECK-NEXT: Name: _dynamic (80) +# CHECK-NEXT: Extern # CHECK-NEXT: Type: Undef (0x0) # CHECK-NEXT: Section: (0x0) # CHECK-NEXT: RefType: UndefinedNonLazy (0x0) diff --git a/lld/test/MachO/weak-reference.s b/lld/test/MachO/weak-reference.s new file mode 100644 --- /dev/null +++ b/lld/test/MachO/weak-reference.s @@ -0,0 +1,144 @@ +# REQUIRES: x86 +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/libfoo.s -o %t/libfoo.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/strongref.s -o %t/strongref.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/invalid.s -o %t/invalid.o +# RUN: %lld -lSystem -dylib %t/libfoo.o -o %t/libfoo.dylib + +# RUN: %lld -lSystem %t/test.o %t/libfoo.dylib -o %t/test +# RUN: llvm-objdump --macho --syms --bind %t/test | FileCheck %s --check-prefixes=SYMS,BIND +## llvm-objdump doesn't print out all the flags info for lazy & weak bindings, +## so we use obj2yaml instead to test them. +# RUN: obj2yaml %t/test | FileCheck %s --check-prefix=YAML + +# RUN: %lld -lSystem %t/libfoo.dylib %t/test.o -o %t/test +# RUN: llvm-objdump --macho --syms --bind %t/test | FileCheck %s --check-prefixes=SYMS,BIND +# RUN: obj2yaml %t/test | FileCheck %s --check-prefix=YAML + +# SYMS: SYMBOL TABLE: +# SYMS-DAG: 0000000000000000 w *UND* _foo +# SYMS-DAG: 0000000000000000 w *UND* _foo_fn +# SYMS-DAG: 0000000000000000 w *UND* _foo_tlv +# SYMS-DAG: 0000000000000000 w *UND* _weak_foo +# SYMS-DAG: 0000000000000000 w *UND* _weak_foo_fn + +# BIND: Bind table: +# BIND-NEXT: segment section address type addend dylib symbol +# BIND-DAG: __DATA __data 0x{{[0-9a-f]+}} pointer 0 libfoo _foo (weak_import) +# BIND-DAG: __DATA_CONST __got 0x{{[0-9a-f]+}} pointer 0 libfoo _foo (weak_import) +# BIND-DAG: __DATA __thread_ptrs 0x{{[0-9a-f]+}} pointer 0 libfoo _foo_tlv (weak_import) +# BIND-DAG: __DATA __data 0x{{[0-9a-f]+}} pointer 0 libfoo _weak_foo (weak_import) +# BIND-DAG: __DATA __la_symbol_ptr 0x{{[0-9a-f]+}} pointer 0 libfoo _weak_foo_fn (weak_import) + +# YAML-LABEL: WeakBindOpcodes: +# YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM +# YAML-NEXT: Imm: 0 +# YAML-NEXT: Symbol: _weak_foo +# YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM +# YAML-NEXT: Imm: 0 +# YAML-NEXT: Symbol: _weak_foo_fn +# YAML-LABEL: LazyBindOpcodes: +# YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM +# YAML-NEXT: Imm: 1 +# YAML-NEXT: Symbol: _foo_fn + +## Check that if both strong & weak references are present in inputs, the weak +## reference takes priority. NOTE: ld64 actually emits a strong reference if +## the reference is to a function symbol or a TLV. I'm not sure if there's a +## good reason for that, so I'm deviating here for a simpler implementation. +# RUN: %lld -lSystem %t/test.o %t/strongref.o %t/libfoo.dylib -o %t/with-strong +# RUN: llvm-objdump --macho --bind %t/with-strong | FileCheck %s --check-prefix=STRONG-BIND +# RUN: obj2yaml %t/with-strong | FileCheck %s --check-prefix=STRONG-YAML +# RUN: %lld -lSystem %t/strongref.o %t/test.o %t/libfoo.dylib -o %t/with-strong +# RUN: llvm-objdump --macho --bind %t/with-strong | FileCheck %s --check-prefix=STRONG-BIND +# RUN: obj2yaml %t/with-strong | FileCheck %s --check-prefix=STRONG-YAML +# RUN: %lld -lSystem %t/libfoo.dylib %t/strongref.o %t/test.o -o %t/with-strong +# RUN: llvm-objdump --macho --bind %t/with-strong | FileCheck %s --check-prefix=STRONG-BIND +# RUN: obj2yaml %t/with-strong | FileCheck %s --check-prefix=STRONG-YAML +# RUN: %lld -lSystem %t/libfoo.dylib %t/test.o %t/strongref.o -o %t/with-strong +# RUN: llvm-objdump --macho --bind %t/with-strong | FileCheck %s --check-prefix=STRONG-BIND +# RUN: obj2yaml %t/with-strong | FileCheck %s --check-prefix=STRONG-YAML +# RUN: %lld -lSystem %t/test.o %t/libfoo.dylib %t/strongref.o -o %t/with-strong +# RUN: llvm-objdump --macho --bind %t/with-strong | FileCheck %s --check-prefix=STRONG-BIND +# RUN: obj2yaml %t/with-strong | FileCheck %s --check-prefix=STRONG-YAML +# RUN: %lld -lSystem %t/strongref.o %t/libfoo.dylib %t/test.o -o %t/with-strong +# RUN: llvm-objdump --macho --bind %t/with-strong | FileCheck %s --check-prefix=STRONG-BIND +# RUN: obj2yaml %t/with-strong | FileCheck %s --check-prefix=STRONG-YAML + +# STRONG-BIND: Bind table: +# STRONG-BIND-NEXT: segment section address type addend dylib symbol +# STRONG-BIND-DAG: __DATA __data 0x{{[0-9a-f]+}} pointer 0 libfoo _foo{{$}} +# STRONG-BIND-DAG: __DATA __data 0x{{[0-9a-f]+}} pointer 0 libfoo _foo{{$}} +# STRONG-BIND-DAG: __DATA_CONST __got 0x{{[0-9a-f]+}} pointer 0 libfoo _foo{{$}} +# STRONG-BIND-DAG: __DATA __thread_ptrs 0x{{[0-9a-f]+}} pointer 0 libfoo _foo_tlv{{$}} +# STRONG-BIND-DAG: __DATA __data 0x{{[0-9a-f]+}} pointer 0 libfoo _weak_foo{{$}} +# STRONG-BIND-DAG: __DATA __data 0x{{[0-9a-f]+}} pointer 0 libfoo _weak_foo{{$}} +# STRONG-BIND-DAG: __DATA __la_symbol_ptr 0x{{[0-9a-f]+}} pointer 0 libfoo _weak_foo_fn{{$}} + +# STRONG-YAML-LABEL: WeakBindOpcodes: +# STRONG-YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM +# STRONG-YAML-NEXT: Imm: 0 +# STRONG-YAML-NEXT: Symbol: _weak_foo +# STRONG-YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM +# STRONG-YAML-NEXT: Imm: 0 +# STRONG-YAML-NEXT: Symbol: _weak_foo +# STRONG-YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM +# STRONG-YAML-NEXT: Imm: 0 +# STRONG-YAML-NEXT: Symbol: _weak_foo_fn +# STRONG-YAML-LABEL: LazyBindOpcodes: +# STRONG-YAML: - Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM +# STRONG-YAML-NEXT: Imm: 0 +# STRONG-YAML-NEXT: Symbol: _foo_fn + +## Weak references must still be satisfied at link time. +# RUN: not %lld -lSystem %t/invalid.o -o /dev/null 2>&1 | FileCheck %s \ +# RUN: --check-prefix=INVALID -DDIR=%t +# INVALID: error: undefined symbol _missing, referenced from [[DIR]]/invalid.o + +#--- libfoo.s +.globl _foo, _foo_fn, _weak_foo, _weak_foo_fn +.weak_definition _weak_foo, _weak_foo_fn +_foo: +_foo_fn: +_weak_foo: +_weak_foo_fn: + +.section __DATA,__thread_vars,thread_local_variables +.globl _foo_tlv +_foo_tlv: + +#--- test.s +.globl _main +.weak_reference _foo_fn, _foo, _weak_foo, _weak_foo_fn, _foo_tlv + +_main: + mov _foo@GOTPCREL(%rip), %rax + mov _foo_tlv@TLVP(%rip), %rax + callq _foo_fn + callq _weak_foo_fn + ret + +.data + .quad _foo + .quad _weak_foo + +#--- strongref.s +.globl _strongref +_strongref: + mov _foo@GOTPCREL(%rip), %rax + mov _foo_tlv@TLVP(%rip), %rax + callq _foo_fn + callq _weak_foo_fn + ret + +.data + .quad _foo + .quad _weak_foo + +#--- invalid.s +.globl _main +.weak_reference _missing +_main: + callq _missing + ret