Index: include/llvm/LTO/LTO.h =================================================================== --- include/llvm/LTO/LTO.h +++ include/llvm/LTO/LTO.h @@ -126,6 +126,7 @@ using irsymtab::Symbol::getCommonSize; using irsymtab::Symbol::getCommonAlignment; using irsymtab::Symbol::getCOFFWeakExternalFallback; + using irsymtab::Symbol::hasELFCIdentifierSectionName; using irsymtab::Symbol::isExecutable; }; Index: include/llvm/Object/IRSymtab.h =================================================================== --- include/llvm/Object/IRSymtab.h +++ include/llvm/Object/IRSymtab.h @@ -121,6 +121,13 @@ /// COFF-specific: the name of the symbol that a weak external resolves to /// if not defined. Str COFFWeakExternFallbackName; + + /// ELF-specific: ELF linkers generate __start_ and __stop_ + /// symbols when there is a value in a section where the name is a + /// valid C identifier. Track symbols in such a section, so that we can + /// ensure that they aren't internalized and eliminated, which would + /// suppress the generation of the special __start_ and __end_ symbols. + bool HasELFCIdentifierSectionName; }; struct Header { @@ -128,7 +135,7 @@ /// when the format changes, but it does not need to be incremented if a /// change to LLVM would cause it to create a different symbol table. Word Version; - enum { kCurrentVersion = 0 }; + enum { kCurrentVersion = 1 }; /// The producer's version string (LLVM_VERSION_STRING " " LLVM_REVISION). /// Consumers should rebuild the symbol table from IR if the producer's @@ -165,6 +172,7 @@ // Copied from storage::Uncommon. uint32_t CommonSize, CommonAlign; StringRef COFFWeakExternFallbackName; + bool HasELFCIdentifierSectionName; /// Returns the mangled symbol name. StringRef getName() const { return Name; } @@ -215,6 +223,15 @@ assert(isWeak() && isIndirect()); return COFFWeakExternFallbackName; } + + /// ELF-specific: ELF linkers generate __start_ and __stop_ + /// symbols when there is a value in a section where the name is a + /// valid C identifier. Track symbols in such a section, so that we can + /// ensure that they aren't internalized and eliminated, which would + /// suppress the generation of the special __start_ and __end_ symbols. + bool hasELFCIdentifierSectionName() const { + return HasELFCIdentifierSectionName; + } }; /// This class can be used to read a Symtab and Strtab produced by @@ -300,6 +317,7 @@ CommonSize = UncI->CommonSize; CommonAlign = UncI->CommonAlign; COFFWeakExternFallbackName = R->str(UncI->COFFWeakExternFallbackName); + HasELFCIdentifierSectionName = UncI->HasELFCIdentifierSectionName; } } Index: lib/LTO/LTO.cpp =================================================================== --- lib/LTO/LTO.cpp +++ lib/LTO/LTO.cpp @@ -384,8 +384,13 @@ // Set the partition to external if we know it is re-defined by the linker // with -defsym or -wrap options, used elsewhere, e.g. it is visible to a // regular object, is referenced from llvm.compiler_used, or was already - // recorded as being referenced from a different partition. + // recorded as being referenced from a different partition. Also, + // if this is ELF and the symbol has a C identifier section name, we + // need to keep it around to ensure the linker generates special + // __start_ and __end_ symbols which may be used + // elsewhere. if (Res.LinkerRedefined || Res.VisibleToRegularObj || Sym.isUsed() || + Sym.hasELFCIdentifierSectionName() || (GlobalRes.Partition != GlobalResolution::Unknown && GlobalRes.Partition != Partition)) { GlobalRes.Partition = GlobalResolution::External; @@ -396,7 +401,8 @@ // Flag as visible outside of summary if visible from a regular object or // from a module that does not have a summary. GlobalRes.VisibleOutsideSummary |= - (Res.VisibleToRegularObj || Sym.isUsed() || !InSummary); + (Res.VisibleToRegularObj || Sym.isUsed() || !InSummary || + Sym.hasELFCIdentifierSectionName()); } } Index: lib/Object/IRSymtab.cpp =================================================================== --- lib/Object/IRSymtab.cpp +++ lib/Object/IRSymtab.cpp @@ -140,6 +140,18 @@ return Error::success(); } +static bool isAlpha(char C) { + return ('a' <= C && C <= 'z') || ('A' <= C && C <= 'Z') || C == '_'; +} + +static bool isAlnum(char C) { return isAlpha(C) || ('0' <= C && C <= '9'); } + +// Returns true if S is valid as a C language identifier. +static bool isValidCIdentifier(StringRef S) { + return !S.empty() && isAlpha(S[0]) && + std::all_of(S.begin() + 1, S.end(), isAlnum); +} + Error Builder::addSymbol(const ModuleSymbolTable &Msymtab, const SmallPtrSet &Used, ModuleSymbolTable::Symbol Msym) { @@ -240,6 +252,10 @@ } } + if (TT.isOSBinFormatELF()) + Uncommon().HasELFCIdentifierSectionName = + isValidCIdentifier(Base->getSection()); + return Error::success(); } Index: test/Object/X86/irsymtab.ll =================================================================== --- test/Object/X86/irsymtab.ll +++ test/Object/X86/irsymtab.ll @@ -9,13 +9,13 @@ ; BCA: blob data = '\x00\x00\x00\x00\x06\x00\x00\x00\x08\x00\x00\x00D\x00\x00\x00\x01\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00P\x00\x00\x00\x02\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x0E\x00\x00\x00\x18\x00\x00\x00&\x00\x00\x00\x0B\x00\x00\x001\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\xFF\xFF\xFF\xFF\x00$\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\xFF\xFF\xFF\xFF\x08$\x00\x00' +; BCA-NEXT: blob data = '\x01\x00\x00\x00\x06\x00\x00\x00\x08\x00\x00\x00D\x00\x00\x00\x01\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00P\x00\x00\x00\x02\x00\x00\x00\x80\x00\x00\x00\x02\x00\x00\x00\x0E\x00\x00\x00\x18\x00\x00\x00&\x00\x00\x00\x0B\x00\x00\x001\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\xFF\xFF\xFF\xFF\x04$\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\xFF\xFF\xFF\xFF\x0C$\x00\x00\x00\x00\x00\x00\x00\x00\x00\x001\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x001\x00\x00\x00\x00\x00\x00\x00\x00' ; BCA-NEXT: ; BCA-NEXT: blob data = 'foobarproducerx86_64-unknown-linux-gnuirsymtab.ll' ; BCA-NEXT: -; SYMTAB: version: 0 +; SYMTAB: version: 1 ; SYMTAB-NEXT: producer: producer ; SYMTAB-NEXT: target triple: x86_64-unknown-linux-gnu ; SYMTAB-NEXT: source filename: irsymtab.ll Index: test/tools/gold/X86/Inputs/global_with_section.ll =================================================================== --- /dev/null +++ test/tools/gold/X86/Inputs/global_with_section.ll @@ -0,0 +1,6 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @deadfunc2() { + ret void +} Index: test/tools/gold/X86/global_with_section.ll =================================================================== --- /dev/null +++ test/tools/gold/X86/global_with_section.ll @@ -0,0 +1,59 @@ +; Test to ensure we don't internalize or treat as dead a global value +; with a section. Otherwise, ELF linker generation of __start_"sectionname" +; and __stop_"sectionname" symbols would not occur and we can end up +; with undefined references at link time. + +; RUN: opt %s -o %t.o +; RUN: llvm-lto2 dump-symtab %t.o | FileCheck %s --check-prefix=SYMTAB +; RUN: opt %p/Inputs/global_with_section.ll -o %t2.o +; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold.so \ +; RUN: -u foo \ +; RUN: --plugin-opt=save-temps \ +; RUN: -o %t3.o %t.o %t2.o +; Check results of internalization +; RUN: llvm-dis %t3.o.0.2.internalize.bc -o - | FileCheck %s --check-prefix=CHECK2-REGULARLTO + +; Do setup work for all below tests: generate bitcode +; RUN: opt -module-summary %s -o %t.o +; RUN: llvm-lto2 dump-symtab %t.o | FileCheck %s --check-prefix=SYMTAB +; RUN: opt -module-summary %p/Inputs/global_with_section.ll -o %t2.o + +; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold.so \ +; RUN: -u foo \ +; RUN: --plugin-opt=thinlto \ +; RUN: --plugin-opt=save-temps \ +; RUN: -o %t3.o %t.o %t2.o +; Check results of internalization +; RUN: llvm-dis %t.o.2.internalize.bc -o - | FileCheck %s +; RUN: llvm-dis %t2.o.2.internalize.bc -o - | FileCheck %s --check-prefix=CHECK2-THINLTO + +; SYMTAB: deadfunc +; SYMTAB-NEXT: has C idenfier section name +; SYMTAB-NEXT: deadfunc2 +; SYMTAB-NEXT: var_with_section +; SYMTAB-NEXT: has C idenfier section name + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; We should not internalize @var_with_section due to section +; CHECK-DAG: @var_with_section = global i32 0, section "some_section" +@var_with_section = global i32 0, section "some_section" + +; We should not internalize @deadfunc due to section +; CHECK-DAG: define void @deadfunc() section "some_other_section" +define void @deadfunc() section "some_other_section" { + call void @deadfunc2() + ret void +} + +; In RegularLTO mode, where we have combined all the IR, @deadfunc2 +; should still exist but would be internalized. +; CHECK2-REGULARLTO: define internal void @deadfunc2 +; In ThinLTO mode, we can't internalize it as it needs to be preserved +; (due to the access from @deadfunc which must be preserved), and +; can't be internalized since the reference is from a different module. +; CHECK2-THINLTO: define void @deadfunc2 +declare void @deadfunc2() + +@foo = global i32 0 Index: tools/llvm-lto2/llvm-lto2.cpp =================================================================== --- tools/llvm-lto2/llvm-lto2.cpp +++ tools/llvm-lto2/llvm-lto2.cpp @@ -355,6 +355,9 @@ if (TT.isOSBinFormatCOFF() && Sym.isWeak() && Sym.isIndirect()) outs() << " fallback " << Sym.getCOFFWeakExternalFallback() << '\n'; + + if (TT.isOSBinFormatELF() && Sym.hasELFCIdentifierSectionName()) + outs() << " has C idenfier section name\n"; } outs() << '\n';