Index: ELF/Driver.cpp =================================================================== --- ELF/Driver.cpp +++ ELF/Driver.cpp @@ -1120,6 +1120,7 @@ if (!Config->Relocatable) InputSections.push_back(createCommentSection()); + splitMergeSections(); // Do size optimizations: garbage collection, merging of SHF_MERGE sections // and identical code folding. markLive(); Index: ELF/InputSection.h =================================================================== --- ELF/InputSection.h +++ ELF/InputSection.h @@ -230,12 +230,7 @@ StringRef Name); static bool classof(const SectionBase *S) { return S->kind() == Merge; } void splitIntoPieces(); - - // Mark the piece at a given offset live. Used by GC. - void markLiveAt(uint64_t Offset) { - if (this->Flags & llvm::ELF::SHF_ALLOC) - LiveOffsets.insert(Offset); - } + void markLiveAt(uint64_t Offset); // Translate an offset in the input section to an offset // in the output section. Index: ELF/InputSection.cpp =================================================================== --- ELF/InputSection.cpp +++ ELF/InputSection.cpp @@ -932,10 +932,14 @@ splitStrings(Data, Entsize); else splitNonStrings(Data, Entsize); +} + +// Mark the piece at a given offset live. Used by GC. +void MergeInputSection::markLiveAt(uint64_t Offset) { + assert(!Pieces.empty()); - if (Config->GcSections && (this->Flags & SHF_ALLOC)) - for (uint64_t Off : LiveOffsets) - this->getSectionPiece(Off)->Live = true; + if (this->Flags & llvm::ELF::SHF_ALLOC) + this->getSectionPiece(Offset)->Live = true; } // Do binary search to get a section piece at a given input offset. Index: ELF/LinkerScript.cpp =================================================================== --- ELF/LinkerScript.cpp +++ ELF/LinkerScript.cpp @@ -133,6 +133,7 @@ /*CanOmitFromDynSym*/ false, /*File*/ nullptr); Sym->Binding = STB_GLOBAL; + Sym->Live = true; ExprValue Value = Cmd->Expression(); SectionBase *Sec = Value.isAbsolute() ? nullptr : Value.Sec; Index: ELF/MarkLive.cpp =================================================================== --- ELF/MarkLive.cpp +++ ELF/MarkLive.cpp @@ -59,11 +59,23 @@ // identifiers, so we just store a std::vector instead of a multimap. static DenseMap> CNamedSections; +template static void markSymbolLive(Symbol *S) { + S->Live = true; + // For non-weak DSO symbols, mark the file to be added to a DT_NEEDED entry. + if (S->isWeak()) + return; + if (auto *F = dyn_cast_or_null>(S->File)) + F->IsUsed = true; +} + template static void resolveReloc(InputSectionBase &Sec, RelT &Rel, std::function Fn) { Symbol &B = Sec.getFile()->getRelocTargetSym(Rel); + // All symbols referenced in live sections are live. + markSymbolLive(&B); + if (auto *D = dyn_cast(&B)) { if (!D->Section) return; @@ -259,10 +271,15 @@ // input sections. This function make some or all of them on // so that they are emitted to the output file. template void elf::markLive() { - // If -gc-sections is missing, no sections are removed. + // If -gc-sections is missing, no sections and no symbols are removed. if (!Config->GcSections) { for (InputSectionBase *Sec : InputSections) Sec->Live = true; + for (Symbol *S : Symtab->getSymbols()) + markSymbolLive(S); + for (InputFile *F : ObjectFiles) + for (Symbol *S : cast>(F)->getLocalSymbols()) + markSymbolLive(S); return; } @@ -290,6 +307,36 @@ // Follow the graph to mark all live sections. doGcSections(); + // Set Live flag for all symbols which survived. + // We already marked some of them in resolveReloc(), but if a symbol just + // points to a section and is not used in any relocation we haven't seen it. + auto ProcessSymbol = [&](Symbol *S) { + auto *D = dyn_cast(S); + if (!D) + return; + SectionBase *Sec = D->Section; + // Always include absolute symbols. + if (!Sec) { + markSymbolLive(S); + return; + } + if (auto *IS = dyn_cast(Sec)) + Sec = IS->Repl; + // Exclude symbols pointing to garbage-collected sections. + if (!Sec->Live) + return; + if (auto *S = dyn_cast(Sec)) + if (!S->getSectionPiece(D->Value)->Live) + return; + markSymbolLive(S); + }; + + for (Symbol *S : Symtab->getSymbols()) + ProcessSymbol(S); + for (InputFile *F : ObjectFiles) + for (Symbol *S : cast>(F)->getLocalSymbols()) + ProcessSymbol(S); + // Report garbage-collected sections. if (Config->PrintGcSections) for (InputSectionBase *Sec : InputSections) Index: ELF/SymbolTable.cpp =================================================================== --- ELF/SymbolTable.cpp +++ ELF/SymbolTable.cpp @@ -504,8 +504,6 @@ S->getVisibility() == STV_DEFAULT)) { replaceSymbol(S, File, Name, Sym.st_other, Sym.getType(), Sym.st_value, Sym.st_size, Alignment, Verdef); - if (!S->isWeak()) - File->IsUsed = true; } } Index: ELF/Symbols.h =================================================================== --- ELF/Symbols.h +++ ELF/Symbols.h @@ -133,8 +133,8 @@ Symbol(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type) : SymbolKind(K), IsLocal(IsLocal), NeedsPltAddr(false), IsInGlobalMipsGot(false), Is32BitMipsGot(false), IsInIplt(false), - IsInIgot(false), IsPreemptible(false), Type(Type), StOther(StOther), - Name(Name) {} + IsInIgot(false), IsPreemptible(false), Live(false), Type(Type), + StOther(StOther), Name(Name) {} const unsigned SymbolKind : 8; @@ -159,6 +159,10 @@ unsigned IsPreemptible : 1; + // True if this symbol survived after GC. + // If GC is disabled, all symbols are set to live. + unsigned Live : 1; + // The following fields have the same meaning as the ELF symbol attributes. uint8_t Type; // symbol type uint8_t StOther; // st_other field value @@ -362,6 +366,7 @@ S->CanInline = Sym.CanInline; S->Traced = Sym.Traced; S->InVersionScript = Sym.InVersionScript; + S->Live = Sym.Live; // Print out a log message if --trace-symbol was specified. // This is for debugging. Index: ELF/SyntheticSections.h =================================================================== --- ELF/SyntheticSections.h +++ ELF/SyntheticSections.h @@ -824,6 +824,7 @@ InputSection *createInterpSection(); template MergeInputSection *createCommentSection(); void decompressSections(); +void splitMergeSections(); void mergeSections(); Symbol *addSyntheticLocal(StringRef Name, uint8_t Type, uint64_t Value, Index: ELF/SyntheticSections.cpp =================================================================== --- ELF/SyntheticSections.cpp +++ ELF/SyntheticSections.cpp @@ -2474,6 +2474,17 @@ }); } +// This function splits all input merge sections so that separate +// pieces can be marked alive in GC. +void elf::splitMergeSections() { + parallelForEach(InputSections, [](InputSectionBase *Sec) { + if (auto *S = dyn_cast(Sec)) { + S->maybeUncompress(); + S->splitIntoPieces(); + } + }); +} + // This function scans over the inputsections to create mergeable // synthetic sections. // @@ -2482,14 +2493,6 @@ // that it replaces. It then finalizes each synthetic section in order // to compute an output offset for each piece of each input section. void elf::mergeSections() { - // splitIntoPieces needs to be called on each MergeInputSection - // before calling finalizeContents(). Do that first. - parallelForEach(InputSections, [](InputSectionBase *Sec) { - if (Sec->Live) - if (auto *S = dyn_cast(Sec)) - S->splitIntoPieces(); - }); - std::vector MergeSections; for (InputSectionBase *&S : InputSections) { MergeInputSection *MS = dyn_cast(S); Index: ELF/Writer.cpp =================================================================== --- ELF/Writer.cpp +++ ELF/Writer.cpp @@ -425,24 +425,7 @@ static bool includeInSymtab(const Symbol &B) { if (!B.isLocal() && !B.IsUsedInRegularObj) return false; - - if (auto *D = dyn_cast(&B)) { - // Always include absolute symbols. - SectionBase *Sec = D->Section; - if (!Sec) - return true; - if (auto *IS = dyn_cast(Sec)) { - Sec = IS->Repl; - IS = cast(Sec); - // Exclude symbols pointing to garbage-collected sections. - if (!IS->Live) - return false; - } - if (auto *S = dyn_cast(Sec)) - if (!S->getSectionPiece(D->Value)->Live) - return false; - } - return true; + return B.Live; } // Local symbols are not in the linker's symbol table. This function scans @@ -743,6 +726,7 @@ Symbol *Sym = Symtab->addRegular(Name, StOther, STT_NOTYPE, Val, /*Size=*/0, Binding, Sec, /*File=*/nullptr); + Sym->Live = true; return cast(Sym); } @@ -772,20 +756,25 @@ // See "Global Data Symbols" in Chapter 6 in the following document: // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf ElfSym::MipsGp = Symtab->addAbsolute("_gp", STV_HIDDEN, STB_LOCAL); + ElfSym::MipsGp->Live = true; // On MIPS O32 ABI, _gp_disp is a magic symbol designates offset between // start of function and 'gp' pointer into GOT. - if (Symtab->find("_gp_disp")) + if (Symtab->find("_gp_disp")) { ElfSym::MipsGpDisp = Symtab->addAbsolute("_gp_disp", STV_HIDDEN, STB_LOCAL); + ElfSym::MipsGpDisp->Live = true; + } // The __gnu_local_gp is a magic symbol equal to the current value of 'gp' // pointer. This symbol is used in the code generated by .cpload pseudo-op // in case of using -mno-shared option. // https://sourceware.org/ml/binutils/2004-12/msg00094.html - if (Symtab->find("__gnu_local_gp")) + if (Symtab->find("__gnu_local_gp")) { ElfSym::MipsLocalGp = Symtab->addAbsolute("__gnu_local_gp", STV_HIDDEN, STB_LOCAL); + ElfSym::MipsLocalGp->Live = true; + } } // The _GLOBAL_OFFSET_TABLE_ symbol is defined by target convention to @@ -1228,10 +1217,12 @@ // It should be okay as no one seems to care about the type. // Even the author of gold doesn't remember why gold behaves that way. // https://sourceware.org/ml/binutils/2002-03/msg00360.html - if (InX::DynSymTab) - Symtab->addRegular("_DYNAMIC", STV_HIDDEN, STT_NOTYPE, 0 /*Value*/, - /*Size=*/0, STB_WEAK, InX::Dynamic, - /*File=*/nullptr); + if (InX::DynSymTab) { + Symbol *S = Symtab->addRegular("_DYNAMIC", STV_HIDDEN, STT_NOTYPE, + 0 /*Value*/, /*Size=*/0, STB_WEAK, + InX::Dynamic, /*File=*/nullptr); + S->Live = true; + } // Define __rel[a]_iplt_{start,end} symbols if needed. addRelIpltSymbols(); Index: test/ELF/Inputs/shared3.s =================================================================== --- test/ELF/Inputs/shared3.s +++ test/ELF/Inputs/shared3.s @@ -1,3 +1,3 @@ .global baz -.type barz, @function +.type baz, @function baz: Index: test/ELF/gc-collect-undefined.s =================================================================== --- /dev/null +++ test/ELF/gc-collect-undefined.s @@ -0,0 +1,18 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t +# RUN: ld.lld %t -o %tout --gc-sections -shared +# RUN: llvm-nm -D %tout | FileCheck %s + +# CHECK: bar +# CHECK-NOT: qux + + .global foo,bar,qux + .local baz + + .section .data.foo,"aw",%progbits +foo: + .dc.a bar + + .section .bata.baz,"aw",%progbits +baz: + .dc.a qux Index: test/ELF/gc-sections-shared.s =================================================================== --- test/ELF/gc-sections-shared.s +++ test/ELF/gc-sections-shared.s @@ -1,12 +1,15 @@ # REQUIRES: x86 # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/shared.s -o %t2.o # RUN: ld.lld -shared %t2.o -o %t2.so +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/shared3.s -o %t3.o +# RUN: ld.lld -shared %t3.o -o %t3.so # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t.o -# RUN: ld.lld --gc-sections --export-dynamic-symbol foo -o %t %t.o --as-needed %t2.so +# RUN: ld.lld --gc-sections --export-dynamic-symbol foo -o %t %t.o --as-needed %t2.so %t3.so # RUN: llvm-readobj --dynamic-table --dyn-symbols %t | FileCheck %s # This test the property that we have a needed line for every undefined. -# It would also be OK to drop bar2 and the need for the .so +# '%t2.so' is dropped because 'bar2' is eliminated, whereas +# '%t3.so' is preserved because 'baz' is used. # CHECK: DynamicSymbols [ # CHECK-NEXT: Symbol { @@ -19,16 +22,16 @@ # CHECK-NEXT: Section: Undefined (0x0) # CHECK-NEXT: } # CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: bar2 +# CHECK-NEXT: Name: bar # CHECK-NEXT: Value: # CHECK-NEXT: Size: # CHECK-NEXT: Binding: Global # CHECK-NEXT: Type: # CHECK-NEXT: Other: -# CHECK-NEXT: Section: Undefined +# CHECK-NEXT: Section: .text # CHECK-NEXT: } # CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: bar +# CHECK-NEXT: Name: foo # CHECK-NEXT: Value: # CHECK-NEXT: Size: # CHECK-NEXT: Binding: Global @@ -37,22 +40,24 @@ # CHECK-NEXT: Section: .text # CHECK-NEXT: } # CHECK-NEXT: Symbol { -# CHECK-NEXT: Name: foo +# CHECK-NEXT: Name: baz # CHECK-NEXT: Value: # CHECK-NEXT: Size: # CHECK-NEXT: Binding: Global # CHECK-NEXT: Type: # CHECK-NEXT: Other: -# CHECK-NEXT: Section: .text +# CHECK-NEXT: Section: Undefined # CHECK-NEXT: } # CHECK-NEXT: ] -# CHECK: NEEDED Shared library: [{{.*}}.so] +# CHECK-NOT: NEEDED Shared library: [{{.*}}2.so] +# CHECK: NEEDED Shared library: [{{.*}}3.so] .section .text.foo, "ax" .globl foo foo: call bar +call baz .section .text.bar, "ax" .globl bar Index: test/ELF/gnu-hash-table.s =================================================================== --- test/ELF/gnu-hash-table.s +++ test/ELF/gnu-hash-table.s @@ -1,6 +1,6 @@ # REQUIRES: x86,ppc -# RUN: echo ".globl foo" > %te.s +# RUN: echo ".globl foo; .data; .dc.a foo" > %te.s # RUN: llvm-mc -filetype=obj -triple=i386-pc-linux %te.s -o %te-i386.o # RUN: llvm-mc -filetype=obj -triple=i386-pc-linux %s -o %t-i386.o # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t-x86_64.o @@ -244,3 +244,5 @@ bar: .weak zed .global xyz +.data + .dc.a baz Index: test/ELF/lto/shlib-undefined.ll =================================================================== --- test/ELF/lto/shlib-undefined.ll +++ test/ELF/lto/shlib-undefined.ll @@ -1,6 +1,6 @@ ; REQUIRES: x86 ; RUN: llvm-as %s -o %t.o -; RUN: echo .global __progname > %t2.s +; RUN: echo ".global __progname; .data; .dc.a __progname" > %t2.s ; RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %t2.s -o %t2.o ; RUN: ld.lld -shared %t2.o -o %t2.so ; RUN: ld.lld -o %t %t.o %t2.so Index: test/ELF/progname.s =================================================================== --- test/ELF/progname.s +++ test/ELF/progname.s @@ -1,6 +1,6 @@ // REQUIRES: x86 // RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o -// RUN: echo .global __progname > %t2.s +// RUN: echo ".global __progname; .data; .dc.a __progname" > %t2.s // RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %t2.s -o %t2.o // RUN: ld.lld -shared %t2.o -o %t2.so // RUN: ld.lld -o %t %t.o %t2.so Index: test/ELF/undef-version-script.s =================================================================== --- test/ELF/undef-version-script.s +++ test/ELF/undef-version-script.s @@ -3,9 +3,6 @@ # RUN: ld.lld --version-script %t.script -shared %t.o -o %t.so # RUN: llvm-readobj -dyn-symbols %t.so | FileCheck %s -# This does not match gold's behavior because gold does not create undefined -# symbols in dynsym without an appropriate (e.g. PLT) relocation in the input. - # CHECK: DynamicSymbols [ # CHECK-NEXT: Symbol { # CHECK-NEXT: Name: @ @@ -38,3 +35,6 @@ .global foo .weak bar +.data + .dc.a foo + .dc.a bar Index: test/ELF/weak-undef.s =================================================================== --- test/ELF/weak-undef.s +++ test/ELF/weak-undef.s @@ -28,3 +28,6 @@ .globl _start _start: + +.data + .dc.a foo