diff --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp --- a/llvm/lib/Object/ArchiveWriter.cpp +++ b/llvm/lib/Object/ArchiveWriter.cpp @@ -286,22 +286,12 @@ print(Out, Kind, Val); } -static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, - bool Deterministic, ArrayRef Members, - StringRef StringTable) { - // We don't write a symbol table on an archive with no members -- except on - // Darwin, where the linker will abort unless the archive has a symbol table. - if (StringTable.empty() && !isDarwin(Kind)) - return; - - unsigned NumSyms = 0; - for (const MemberData &M : Members) - NumSyms += M.Symbols.size(); - - unsigned Size = 0; - unsigned OffsetSize = is64BitKind(Kind) ? sizeof(uint64_t) : sizeof(uint32_t); - - Size += OffsetSize; // Number of entries +static uint64_t computeSymbolTableSize(object::Archive::Kind Kind, + uint64_t NumSyms, uint64_t OffsetSize, + StringRef StringTable, + uint32_t *Padding = nullptr) { + assert((OffsetSize == 4 || OffsetSize == 8) && "Unsupported OffsetSize"); + uint64_t Size = OffsetSize; // Number of entries if (isBSDLike(Kind)) Size += NumSyms * OffsetSize * 2; // Table else @@ -313,10 +303,15 @@ // least 4-byte aligned for 32-bit content. Opt for the larger encoding // uniformly. // We do this for all bsd formats because it simplifies aligning members. - const Align Alignment(isBSDLike(Kind) ? 8 : 2); - unsigned Pad = offsetToAlignment(Size, Alignment); + uint32_t Pad = offsetToAlignment(Size, Align(isBSDLike(Kind) ? 8 : 2)); Size += Pad; + if (Padding) + *Padding = Pad; + return Size; +} +static void writeSymbolTableHeader(raw_ostream &Out, object::Archive::Kind Kind, + bool Deterministic, uint64_t Size) { if (isBSDLike(Kind)) { const char *Name = is64BitKind(Kind) ? "__.SYMDEF_64" : "__.SYMDEF"; printBSDMemberHeader(Out, Out.tell(), Name, now(Deterministic), 0, 0, 0, @@ -325,6 +320,24 @@ const char *Name = is64BitKind(Kind) ? "/SYM64" : ""; printGNUSmallMemberHeader(Out, Name, now(Deterministic), 0, 0, 0, Size); } +} + +static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, + bool Deterministic, ArrayRef Members, + StringRef StringTable) { + // We don't write a symbol table on an archive with no members -- except on + // Darwin, where the linker will abort unless the archive has a symbol table. + if (StringTable.empty() && !isDarwin(Kind)) + return; + + unsigned NumSyms = 0; + for (const MemberData &M : Members) + NumSyms += M.Symbols.size(); + + uint64_t OffsetSize = is64BitKind(Kind) ? 8 : 4; + uint32_t Pad; + uint64_t Size = computeSymbolTableSize(Kind, NumSyms, OffsetSize, StringTable, &Pad); + writeSymbolTableHeader(Out, Kind, Deterministic, Size); uint64_t Pos = Out.tell() + Size; @@ -579,17 +592,28 @@ // We would like to detect if we need to switch to a 64-bit symbol table. if (WriteSymtab) { - uint64_t MaxOffset = 0; + uint64_t MaxOffset = 8; // For the file signature. uint64_t LastOffset = MaxOffset; + uint64_t NumSyms = 0; for (const auto &M : Data) { // Record the start of the member's offset LastOffset = MaxOffset; // Account for the size of each part associated with the member. MaxOffset += M.Header.size() + M.Data.size() + M.Padding.size(); - // We assume 32-bit symbols to see if 32-bit symbols are possible or not. - MaxOffset += M.Symbols.size() * 4; + NumSyms += M.Symbols.size(); } + // We assume 32-bit offsets to see if 32-bit symbols are possible or not. + uint64_t SymtabSize = computeSymbolTableSize(Kind, NumSyms, 4, SymNamesBuf); + auto computeSymbolTableHeaderSize = + [=] { + SmallString<0> TmpBuf; + raw_svector_ostream Tmp(TmpBuf); + writeSymbolTableHeader(Tmp, Kind, Deterministic, SymtabSize); + return TmpBuf.size(); + }; + LastOffset += computeSymbolTableHeaderSize() + SymtabSize; + // The SYM64 format is used when an archive's member offsets are larger than // 32-bits can hold. The need for this shift in format is detected by // writeArchive. To test this we need to generate a file with a member that @@ -597,15 +621,15 @@ // speed the test up we use this environment variable to pretend like the // cutoff happens before 32-bits and instead happens at some much smaller // value. + uint64_t Sym64Threshold = 1ULL << 32; const char *Sym64Env = std::getenv("SYM64_THRESHOLD"); - int Sym64Threshold = 32; if (Sym64Env) StringRef(Sym64Env).getAsInteger(10, Sym64Threshold); // If LastOffset isn't going to fit in a 32-bit varible we need to switch // to 64-bit. Note that the file can be larger than 4GB as long as the last // member starts before the 4GB offset. - if (LastOffset >= (1ULL << Sym64Threshold)) { + if (LastOffset >= Sym64Threshold) { if (Kind == object::Archive::K_DARWIN) Kind = object::Archive::K_DARWIN64; else diff --git a/llvm/test/Object/archive-symtab.test b/llvm/test/Object/archive-symtab.test --- a/llvm/test/Object/archive-symtab.test +++ b/llvm/test/Object/archive-symtab.test @@ -51,9 +51,14 @@ # RUN: llvm-nm -M %t.a | FileCheck %s # RUN: rm -f %t.a -# RUN: env SYM64_THRESHOLD=1 llvm-ar rcsU %t.a %t.elf-x86-64 %t2.elf-x86-64 +# RUN: env SYM64_THRESHOLD=836 llvm-ar rcsU %t.a %t.elf-x86-64 %t2.elf-x86-64 # RUN: llvm-nm -M %t.a | FileCheck %s -# RUXX: grep SYM64 %t.a +# RUN: grep '/SYM64/' %t.a + +# RUN: rm -f %t.a +# RUN: env SYM64_THRESHOLD=837 llvm-ar rcsU %t.a %t.elf-x86-64 %t2.elf-x86-64 +# RUN: llvm-nm -M %t.a | FileCheck %s +# RUN: not grep '/SYM64/' %t.a # CHECK: Archive map # CHECK-NEXT: main in {{.*}}.elf-x86-64 @@ -136,10 +141,15 @@ # RUN: llvm-nm -M %t.a | FileCheck --check-prefix=MACHO %s # RUN: rm -f %t.a -# RUN: env SYM64_THRESHOLD=1 llvm-ar --format=darwin rcsU %t.a %p/Inputs/trivial-object-test.macho-x86-64 %p/Inputs/trivial-object-test2.macho-x86-64 +# RUN: env SYM64_THRESHOLD=784 llvm-ar --format=darwin rcsU %t.a %p/Inputs/trivial-object-test.macho-x86-64 %p/Inputs/trivial-object-test2.macho-x86-64 # RUN: llvm-nm -M %t.a | FileCheck --check-prefix=MACHO %s # RUN: grep '__\.SYMDEF_64' %t.a +# RUN: rm -f %t.a +# RUN: env SYM64_THRESHOLD=785 llvm-ar --format=darwin rcsU %t.a %p/Inputs/trivial-object-test.macho-x86-64 %p/Inputs/trivial-object-test2.macho-x86-64 +# RUN: llvm-nm -M %t.a | FileCheck --check-prefix=MACHO %s +# RUN: not grep '__\.SYMDEF_64' %t.a + # MACHO: Archive map # MACHO-NEXT: _main in trivial-object-test.macho-x86-64 # MACHO-NEXT: _foo in trivial-object-test2.macho-x86-64