Index: include/llvm/LTO/LTO.h =================================================================== --- include/llvm/LTO/LTO.h +++ include/llvm/LTO/LTO.h @@ -126,6 +126,7 @@ using irsymtab::Symbol::getCommonSize; using irsymtab::Symbol::getCommonAlignment; using irsymtab::Symbol::getCOFFWeakExternalFallback; + using irsymtab::Symbol::hasELFCIdentifierSectionName; using irsymtab::Symbol::isExecutable; }; Index: include/llvm/Object/IRSymtab.h =================================================================== --- include/llvm/Object/IRSymtab.h +++ include/llvm/Object/IRSymtab.h @@ -121,6 +121,13 @@ /// COFF-specific: the name of the symbol that a weak external resolves to /// if not defined. Str COFFWeakExternFallbackName; + + /// ELF-specific: ELF linkers generate __start_ and __stop_ + /// symbols when there is a value in a section where the name is a + /// valid C identifier. Track symbols in such a section, so that we can + /// ensure that they aren't internalized and eliminated, which would + /// suppress the generation of the special __start_ and __end_ symbols. + bool HasELFCIdentifierSectionName; }; struct Header { @@ -128,7 +135,7 @@ /// when the format changes, but it does not need to be incremented if a /// change to LLVM would cause it to create a different symbol table. Word Version; - enum { kCurrentVersion = 0 }; + enum { kCurrentVersion = 1 }; /// The producer's version string (LLVM_VERSION_STRING " " LLVM_REVISION). /// Consumers should rebuild the symbol table from IR if the producer's @@ -165,6 +172,7 @@ // Copied from storage::Uncommon. uint32_t CommonSize, CommonAlign; StringRef COFFWeakExternFallbackName; + bool HasELFCIdentifierSectionName; /// Returns the mangled symbol name. StringRef getName() const { return Name; } @@ -215,6 +223,15 @@ assert(isWeak() && isIndirect()); return COFFWeakExternFallbackName; } + + /// ELF-specific: ELF linkers generate __start_ and __stop_ + /// symbols when there is a value in a section where the name is a + /// valid C identifier. Track symbols in such a section, so that we can + /// ensure that they aren't internalized and eliminated, which would + /// suppress the generation of the special __start_ and __end_ symbols. + bool hasELFCIdentifierSectionName() const { + return HasELFCIdentifierSectionName; + } }; /// This class can be used to read a Symtab and Strtab produced by @@ -300,6 +317,7 @@ CommonSize = UncI->CommonSize; CommonAlign = UncI->CommonAlign; COFFWeakExternFallbackName = R->str(UncI->COFFWeakExternFallbackName); + HasELFCIdentifierSectionName = UncI->HasELFCIdentifierSectionName; } } Index: lib/Object/IRSymtab.cpp =================================================================== --- lib/Object/IRSymtab.cpp +++ lib/Object/IRSymtab.cpp @@ -140,6 +140,18 @@ return Error::success(); } +static bool isAlpha(char C) { + return ('a' <= C && C <= 'z') || ('A' <= C && C <= 'Z') || C == '_'; +} + +static bool isAlnum(char C) { return isAlpha(C) || ('0' <= C && C <= '9'); } + +// Returns true if S is valid as a C language identifier. +static bool isValidCIdentifier(StringRef S) { + return !S.empty() && isAlpha(S[0]) && + std::all_of(S.begin() + 1, S.end(), isAlnum); +} + Error Builder::addSymbol(const ModuleSymbolTable &Msymtab, const SmallPtrSet &Used, ModuleSymbolTable::Symbol Msym) { @@ -240,6 +252,10 @@ } } + if (TT.isOSBinFormatELF()) + Uncommon().HasELFCIdentifierSectionName = + isValidCIdentifier(Base->getSection()); + return Error::success(); } Index: test/Object/X86/irsymtab.ll =================================================================== --- test/Object/X86/irsymtab.ll +++ test/Object/X86/irsymtab.ll @@ -9,13 +9,13 @@ ; BCA: blob data = '\x00\x00\x00\x00\x06\x00\x00\x00\x08\x00\x00\x00D\x00\x00\x00\x01\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00P\x00\x00\x00\x02\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x0E\x00\x00\x00\x18\x00\x00\x00&\x00\x00\x00\x0B\x00\x00\x001\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\xFF\xFF\xFF\xFF\x00$\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\xFF\xFF\xFF\xFF\x08$\x00\x00' +; BCA-NEXT: blob data = '\x01\x00\x00\x00\x06\x00\x00\x00\x08\x00\x00\x00D\x00\x00\x00\x01\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00P\x00\x00\x00\x02\x00\x00\x00\x80\x00\x00\x00\x02\x00\x00\x00\x0E\x00\x00\x00\x18\x00\x00\x00&\x00\x00\x00\x0B\x00\x00\x001\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\xFF\xFF\xFF\xFF\x04$\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\xFF\xFF\xFF\xFF\x0C$\x00\x00\x00\x00\x00\x00\x00\x00\x00\x001\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x001\x00\x00\x00\x00\x00\x00\x00\x00' ; BCA-NEXT: ; BCA-NEXT: blob data = 'foobarproducerx86_64-unknown-linux-gnuirsymtab.ll' ; BCA-NEXT: -; SYMTAB: version: 0 +; SYMTAB: version: 1 ; SYMTAB-NEXT: producer: producer ; SYMTAB-NEXT: target triple: x86_64-unknown-linux-gnu ; SYMTAB-NEXT: source filename: irsymtab.ll Index: test/tools/gold/X86/Inputs/global_with_section.ll =================================================================== --- /dev/null +++ test/tools/gold/X86/Inputs/global_with_section.ll @@ -0,0 +1,6 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @deadfunc2() { + ret void +} Index: test/tools/gold/X86/global_with_section.ll =================================================================== --- /dev/null +++ test/tools/gold/X86/global_with_section.ll @@ -0,0 +1,59 @@ +; Test to ensure we don't internalize or treat as dead a global value +; with a section. Otherwise, ELF linker generation of __start_"sectionname" +; and __stop_"sectionname" symbols would not occur and we can end up +; with undefined references at link time. + +; RUN: opt %s -o %t.o +; RUN: llvm-lto2 dump-symtab %t.o | FileCheck %s --check-prefix=SYMTAB +; RUN: opt %p/Inputs/global_with_section.ll -o %t2.o +; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold.so \ +; RUN: -u foo \ +; RUN: --plugin-opt=save-temps \ +; RUN: -o %t3.o %t.o %t2.o +; Check results of internalization +; RUN: llvm-dis %t3.o.0.2.internalize.bc -o - | FileCheck %s --check-prefix=CHECK2-REGULARLTO + +; Do setup work for all below tests: generate bitcode +; RUN: opt -module-summary %s -o %t.o +; RUN: llvm-lto2 dump-symtab %t.o | FileCheck %s --check-prefix=SYMTAB +; RUN: opt -module-summary %p/Inputs/global_with_section.ll -o %t2.o + +; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold.so \ +; RUN: -u foo \ +; RUN: --plugin-opt=thinlto \ +; RUN: --plugin-opt=save-temps \ +; RUN: -o %t3.o %t.o %t2.o +; Check results of internalization +; RUN: llvm-dis %t.o.2.internalize.bc -o - | FileCheck %s +; RUN: llvm-dis %t2.o.2.internalize.bc -o - | FileCheck %s --check-prefix=CHECK2-THINLTO + +; SYMTAB: deadfunc +; SYMTAB-NEXT: has C idenfier section name +; SYMTAB-NEXT: deadfunc2 +; SYMTAB-NEXT: var_with_section +; SYMTAB-NEXT: has C idenfier section name + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; We should not internalize @var_with_section due to section +; CHECK-DAG: @var_with_section = global i32 0, section "some_section" +@var_with_section = global i32 0, section "some_section" + +; We should not internalize @deadfunc due to section +; CHECK-DAG: define void @deadfunc() section "some_other_section" +define void @deadfunc() section "some_other_section" { + call void @deadfunc2() + ret void +} + +; In RegularLTO mode, where we have combined all the IR, @deadfunc2 +; should still exist but would be internalized. +; CHECK2-REGULARLTO: define internal void @deadfunc2 +; In ThinLTO mode, we can't internalize it as it needs to be preserved +; (due to the access from @deadfunc which must be preserved), and +; can't be internalized since the reference is from a different module. +; CHECK2-THINLTO: define void @deadfunc2 +declare void @deadfunc2() + +@foo = global i32 0 Index: tools/gold/gold-plugin.cpp =================================================================== --- tools/gold/gold-plugin.cpp +++ tools/gold/gold-plugin.cpp @@ -616,8 +616,13 @@ toString(ObjOrErr.takeError()).c_str()); unsigned SymNum = 0; + std::unique_ptr Input = std::move(ObjOrErr.get()); + auto InputFileSyms = Input->symbols(); + auto NumInputFileSyms = Input->symbols().size(); std::vector Resols(F.syms.size()); for (ld_plugin_symbol &Sym : F.syms) { + assert(SymNum < NumInputFileSyms); + const InputFile::Symbol &InpSym = InputFileSyms[SymNum]; SymbolResolution &R = Resols[SymNum++]; ld_plugin_symbol_resolution Resolution = @@ -639,6 +644,12 @@ case LDPR_PREVAILING_DEF_IRONLY: R.Prevailing = true; + // If the symbol has a C identifier section name, we need to mark + // it as visible to a regular object so that LTO will keep it around + // to ensure the linker generates special __start_ and + // __end_ symbols which may be used elsewhere. + if (InpSym.hasELFCIdentifierSectionName()) + R.VisibleToRegularObj = true; break; case LDPR_PREVAILING_DEF: @@ -648,7 +659,12 @@ case LDPR_PREVAILING_DEF_IRONLY_EXP: R.Prevailing = true; - if (!Res.CanOmitFromDynSym) + if (!Res.CanOmitFromDynSym || + // If the symbol has a C identifier section name, we need to mark + // it as visible to a regular object so that LTO will keep it around + // to ensure the linker generates special __start_ and + // __end_ symbols which may be used elsewhere. + InpSym.hasELFCIdentifierSectionName()) R.VisibleToRegularObj = true; break; } @@ -660,7 +676,7 @@ freeSymName(Sym); } - check(Lto.add(std::move(*ObjOrErr), Resols), + check(Lto.add(std::move(Input), Resols), std::string("Failed to link module ") + F.name); } Index: tools/llvm-lto2/llvm-lto2.cpp =================================================================== --- tools/llvm-lto2/llvm-lto2.cpp +++ tools/llvm-lto2/llvm-lto2.cpp @@ -355,6 +355,9 @@ if (TT.isOSBinFormatCOFF() && Sym.isWeak() && Sym.isIndirect()) outs() << " fallback " << Sym.getCOFFWeakExternalFallback() << '\n'; + + if (TT.isOSBinFormatELF() && Sym.hasELFCIdentifierSectionName()) + outs() << " has C idenfier section name\n"; } outs() << '\n';