Index: include/llvm/LTO/LTO.h =================================================================== --- include/llvm/LTO/LTO.h +++ include/llvm/LTO/LTO.h @@ -126,6 +126,7 @@ using irsymtab::Symbol::getCommonSize; using irsymtab::Symbol::getCommonAlignment; using irsymtab::Symbol::getCOFFWeakExternalFallback; + using irsymtab::Symbol::getELFCIdentifierSectionName; using irsymtab::Symbol::isExecutable; }; Index: include/llvm/Object/IRSymtab.h =================================================================== --- include/llvm/Object/IRSymtab.h +++ include/llvm/Object/IRSymtab.h @@ -121,6 +121,13 @@ /// COFF-specific: the name of the symbol that a weak external resolves to /// if not defined. Str COFFWeakExternFallbackName; + + /// ELF-specific: ELF linkers generate __start_ and __stop_ + /// symbols when there is a value in a section where the name is a + /// valid C identifier. Track symbols in such a section, so that we can + /// ensure that they aren't internalized and eliminated, which would + /// suppress the generation of the special __start_ and __stop_ symbols. + Str ELFCIdentifierSectionName; }; struct Header { @@ -128,7 +135,7 @@ /// when the format changes, but it does not need to be incremented if a /// change to LLVM would cause it to create a different symbol table. Word Version; - enum { kCurrentVersion = 0 }; + enum { kCurrentVersion = 1 }; /// The producer's version string (LLVM_VERSION_STRING " " LLVM_REVISION). /// Consumers should rebuild the symbol table from IR if the producer's @@ -165,6 +172,7 @@ // Copied from storage::Uncommon. uint32_t CommonSize, CommonAlign; StringRef COFFWeakExternFallbackName; + StringRef ELFCIdentifierSectionName; /// Returns the mangled symbol name. StringRef getName() const { return Name; } @@ -215,6 +223,15 @@ assert(isWeak() && isIndirect()); return COFFWeakExternFallbackName; } + + /// ELF-specific: ELF linkers generate __start_ and __stop_ + /// symbols when there is a value in a section where the name is a + /// valid C identifier. Track symbols in such a section, so that we can + /// ensure that they aren't internalized and eliminated, which would + /// suppress the generation of the special __start_ and __stop_ symbols. + StringRef getELFCIdentifierSectionName() const { + return ELFCIdentifierSectionName; + } }; /// This class can be used to read a Symtab and Strtab produced by @@ -300,7 +317,10 @@ CommonSize = UncI->CommonSize; CommonAlign = UncI->CommonAlign; COFFWeakExternFallbackName = R->str(UncI->COFFWeakExternFallbackName); - } + ELFCIdentifierSectionName = R->str(UncI->ELFCIdentifierSectionName); + } else + // Reset this field so it can be queried unconditionally for all symbols. + ELFCIdentifierSectionName = ""; } public: Index: lib/Object/IRSymtab.cpp =================================================================== --- lib/Object/IRSymtab.cpp +++ lib/Object/IRSymtab.cpp @@ -140,6 +140,18 @@ return Error::success(); } +static bool isAlpha(char C) { + return ('a' <= C && C <= 'z') || ('A' <= C && C <= 'Z') || C == '_'; +} + +static bool isAlnum(char C) { return isAlpha(C) || ('0' <= C && C <= '9'); } + +// Returns true if S is valid as a C language identifier. +static bool isValidCIdentifier(StringRef S) { + return !S.empty() && isAlpha(S[0]) && + std::all_of(S.begin() + 1, S.end(), isAlnum); +} + Error Builder::addSymbol(const ModuleSymbolTable &Msymtab, const SmallPtrSet &Used, ModuleSymbolTable::Symbol Msym) { @@ -156,6 +168,7 @@ Unc = &Uncommons.back(); *Unc = {}; setStr(Unc->COFFWeakExternFallbackName, ""); + setStr(Unc->ELFCIdentifierSectionName, ""); return *Unc; }; @@ -240,6 +253,10 @@ } } + if (TT.isOSBinFormatELF() && isValidCIdentifier(Base->getSection())) + setStr(Uncommon().ELFCIdentifierSectionName, + Saver.save(Base->getSection())); + return Error::success(); } Index: test/Object/X86/irsymtab.ll =================================================================== --- test/Object/X86/irsymtab.ll +++ test/Object/X86/irsymtab.ll @@ -9,13 +9,13 @@ ; BCA: blob data = '\x00\x00\x00\x00\x06\x00\x00\x00\x08\x00\x00\x00D\x00\x00\x00\x01\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00P\x00\x00\x00\x02\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x0E\x00\x00\x00\x18\x00\x00\x00&\x00\x00\x00\x0B\x00\x00\x001\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\xFF\xFF\xFF\xFF\x00$\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\xFF\xFF\xFF\xFF\x08$\x00\x00' +; BCA-NEXT: blob data = '\x01\x00\x00\x00\x06\x00\x00\x00\x08\x00\x00\x00D\x00\x00\x00\x01\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00P\x00\x00\x00\x02\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x0E\x00\x00\x00\x18\x00\x00\x00&\x00\x00\x00\x0B\x00\x00\x001\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\xFF\xFF\xFF\xFF\x00$\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\xFF\xFF\xFF\xFF\x08$\x00\x00' ; BCA-NEXT: ; BCA-NEXT: blob data = 'foobarproducerx86_64-unknown-linux-gnuirsymtab.ll' ; BCA-NEXT: -; SYMTAB: version: 0 +; SYMTAB: version: 1 ; SYMTAB-NEXT: producer: producer ; SYMTAB-NEXT: target triple: x86_64-unknown-linux-gnu ; SYMTAB-NEXT: source filename: irsymtab.ll Index: test/tools/gold/X86/Inputs/global_with_section.ll =================================================================== --- /dev/null +++ test/tools/gold/X86/Inputs/global_with_section.ll @@ -0,0 +1,10 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @deadfunc2_called_from_section() { + ret void +} + +define void @deadfunc2_called_from_nonC_section() { + ret void +} Index: test/tools/gold/X86/global_with_section.ll =================================================================== --- /dev/null +++ test/tools/gold/X86/global_with_section.ll @@ -0,0 +1,79 @@ +; Test to ensure we don't internalize or treat as dead a global value +; with a section. Otherwise, ELF linker generation of __start_"sectionname" +; and __stop_"sectionname" symbols would not occur and we can end up +; with undefined references at link time. + +; First try RegularLTO +; RUN: opt %s -o %t.o +; RUN: llvm-lto2 dump-symtab %t.o | FileCheck %s --check-prefix=SYMTAB +; RUN: opt %p/Inputs/global_with_section.ll -o %t2.o +; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold.so \ +; RUN: --plugin-opt=save-temps \ +; RUN: -o %t3.o %t.o %t2.o +; Check results of internalization +; RUN: llvm-dis %t3.o.0.2.internalize.bc -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK2-REGULARLTO + +; Next try ThinLTO +; RUN: opt -module-summary %s -o %t.o +; RUN: llvm-lto2 dump-symtab %t.o | FileCheck %s --check-prefix=SYMTAB +; RUN: opt -module-summary %p/Inputs/global_with_section.ll -o %t2.o +; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold.so \ +; RUN: --plugin-opt=thinlto \ +; RUN: --plugin-opt=save-temps \ +; RUN: -o %t3.o %t.o %t2.o +; Check results of internalization +; RUN: llvm-dis %t.o.2.internalize.bc -o - | FileCheck %s +; RUN: llvm-dis %t2.o.2.internalize.bc -o - | FileCheck %s --check-prefix=CHECK2-THINLTO + +; SYMTAB: deadfunc_with_section +; SYMTAB-NEXT: C identifier section name some_other_section +; SYMTAB-NEXT: deadfunc_with_nonC_section +; SYMTAB-NEXT: deadfunc2_called_from_section +; SYMTAB-NEXT: deadfunc2_called_from_nonC_section +; SYMTAB-NEXT: var_with_section +; SYMTAB-NEXT: C identifier section name some_section +; SYMTAB-NEXT: var_with_nonC_section +; Ensure we don't have a section name for the above symbol +; SYMTAB-NOT: C identifier section name + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; We should not internalize @var_with_section due to section +; CHECK-DAG: @var_with_section = global i32 0, section "some_section" +@var_with_section = global i32 0, section "some_section" + +; Confirm via a variable with a non-C identifier section that we are getting +; the expected internalization. +; CHECK-DAG: @var_with_nonC_section = internal global i32 0, section ".nonCsection" +@var_with_nonC_section = global i32 0, section ".nonCsection" + +; We should not internalize @deadfunc_with_section due to section +; CHECK-DAG: define void @deadfunc_with_section() section "some_other_section" +define void @deadfunc_with_section() section "some_other_section" { + call void @deadfunc2_called_from_section() + ret void +} + +; Confirm via a function with a non-C identifier section that we are getting +; the expected internalization. +; CHECK-DAG: define internal void @deadfunc_with_nonC_section() section ".nonCsection" +define void @deadfunc_with_nonC_section() section ".nonCsection" { + call void @deadfunc2_called_from_nonC_section() + ret void +} + +; In RegularLTO mode, where we have combined all the IR, +; @deadfunc2_called_from_section can be internalized. +; CHECK2-REGULARLTO: define internal void @deadfunc2_called_from_section +; In ThinLTO mode, we can't internalize it as it needs to be preserved +; (due to the access from @deadfunc_with_section which must be preserved), and +; can't be internalized since the reference is from a different module. +; CHECK2-THINLTO: define void @deadfunc2_called_from_section +declare void @deadfunc2_called_from_section() + +; Confirm when called from a function with a non-C identifier section that we +; are getting the expected internalization. +; CHECK2-REGULARLTO: define internal void @deadfunc2_called_from_nonC_section +; CHECK2-THINLTO: define internal void @deadfunc2_called_from_nonC_section +declare void @deadfunc2_called_from_nonC_section() Index: tools/gold/gold-plugin.cpp =================================================================== --- tools/gold/gold-plugin.cpp +++ tools/gold/gold-plugin.cpp @@ -616,8 +616,12 @@ toString(ObjOrErr.takeError()).c_str()); unsigned SymNum = 0; + std::unique_ptr Input = std::move(ObjOrErr.get()); + auto InputFileSyms = Input->symbols(); + assert(InputFileSyms.size() == F.syms.size()); std::vector Resols(F.syms.size()); for (ld_plugin_symbol &Sym : F.syms) { + const InputFile::Symbol &InpSym = InputFileSyms[SymNum]; SymbolResolution &R = Resols[SymNum++]; ld_plugin_symbol_resolution Resolution = @@ -653,6 +657,13 @@ break; } + // If the symbol has a C identifier section name, we need to mark + // it as visible to a regular object so that LTO will keep it around + // to ensure the linker generates special __start_ and + // __stop_ symbols which may be used elsewhere. + if (!InpSym.getELFCIdentifierSectionName().empty()) + R.VisibleToRegularObj = true; + if (Resolution != LDPR_RESOLVED_DYN && Resolution != LDPR_UNDEF && (IsExecutable || !Res.DefaultVisibility)) R.FinalDefinitionInLinkageUnit = true; @@ -660,7 +671,7 @@ freeSymName(Sym); } - check(Lto.add(std::move(*ObjOrErr), Resols), + check(Lto.add(std::move(Input), Resols), std::string("Failed to link module ") + F.name); } Index: tools/llvm-lto2/llvm-lto2.cpp =================================================================== --- tools/llvm-lto2/llvm-lto2.cpp +++ tools/llvm-lto2/llvm-lto2.cpp @@ -355,6 +355,10 @@ if (TT.isOSBinFormatCOFF() && Sym.isWeak() && Sym.isIndirect()) outs() << " fallback " << Sym.getCOFFWeakExternalFallback() << '\n'; + + if (TT.isOSBinFormatELF() && !Sym.getELFCIdentifierSectionName().empty()) + outs() << " C identifier section name " + << Sym.getELFCIdentifierSectionName() << "\n"; } outs() << '\n';