diff --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp --- a/llvm/lib/Object/ArchiveWriter.cpp +++ b/llvm/lib/Object/ArchiveWriter.cpp @@ -331,6 +331,8 @@ std::string Header; StringRef Data; StringRef Padding; + uint64_t PreHeadPadSize = 0; + std::unique_ptr SymFile = nullptr; }; } // namespace @@ -496,21 +498,66 @@ } } -static Expected is64BitSymbolicFile(const StringRef &ObjStringRef) { - MemoryBufferRef ObjMbf(ObjStringRef, ""); - // In the scenario when LLVMContext is populated SymbolicFile will contain a - // reference to it, thus SymbolicFile should be destroyed first. - LLVMContext Context; - Expected> ObjOrErr = - getSymbolicFile(ObjMbf, Context); - if (!ObjOrErr) - return ObjOrErr.takeError(); +static bool is64BitSymbolicFile(const SymbolicFile *SymObj) { + return SymObj != nullptr ? SymObj->is64Bit() : false; +} - // Treat non-symbolic file types as not 64-bits. - if (!*ObjOrErr) - return false; +// Log2 of PAGESIZE(4096) on an AIX system. +static const uint32_t Log2OfAIXPageSize = 12; + +// In the AIX big archive format, since the data content follows the member file +// name, if the name ends on an odd byte, an extra byte will be added for +// padding. This ensures that the data within the member file starts at an even +// byte. +static const uint32_t MinBigArchiveMemDataAlign = 2; + +template +uint16_t getAuxMaxAlignment(uint16_t AuxHeaderSize, AuxiliaryHeader *AuxHeader, + uint16_t Log2OfMaxAlign) { + // If the member doesn't have an auxiliary header, it isn't a loadable object + // and so it just needs aligning at the minimum value. + if (AuxHeader == nullptr) + return MinBigArchiveMemDataAlign; + + // If the auxiliary header does not have both MaxAlignOfData and + // MaxAlignOfText field, it is not a loadable shared object file, so align at + // the minimum value. The 'ModuleType' member is located right after + // 'MaxAlignOfData' in the AuxiliaryHeader. + if (AuxHeaderSize < offsetof(AuxiliaryHeader, ModuleType)) + return MinBigArchiveMemDataAlign; + + // If the XCOFF object file does not have a loader section, it is not + // loadable, so align at the minimum value. + if (AuxHeader->SecNumOfLoader == 0) + return MinBigArchiveMemDataAlign; + + // The content of the loadable member file needs to be aligned at MAX(maximum + // alignment of .text, maximum alignment of .data) if there are both fields. + // If the desired alignment is > PAGESIZE, 32-bit members are aligned on a + // word boundary, while 64-bit members are aligned on a PAGESIZE(2^12=4096) + // boundary. + uint16_t Log2OfAlign = + std::max(AuxHeader->MaxAlignOfText, AuxHeader->MaxAlignOfData); + return 1 << (Log2OfAlign > Log2OfAIXPageSize ? Log2OfMaxAlign : Log2OfAlign); +} - return (*ObjOrErr)->is64Bit(); +// AIX big archives may contain shared object members. The AIX OS requires these +// members to be aligned if they are 64-bit and recommends it for 32-bit +// members. This ensures that when these members are loaded they are aligned in +// memory. +static uint32_t getMemberAlignment(SymbolicFile *SymObj) { + XCOFFObjectFile *XCOFFObj = dyn_cast_or_null(SymObj); + if (!XCOFFObj) + return MinBigArchiveMemDataAlign; + + // If the desired alignment is > PAGESIZE, 32-bit members are aligned on a + // word boundary, while 64-bit members are aligned on a PAGESIZE boundary. + return XCOFFObj->is64Bit() + ? getAuxMaxAlignment(XCOFFObj->fileHeader64()->AuxHeaderSize, + XCOFFObj->auxiliaryHeader64(), + Log2OfAIXPageSize) + : getAuxMaxAlignment(XCOFFObj->fileHeader32()->AuxHeaderSize, + XCOFFObj->auxiliaryHeader32(), 2); } static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, @@ -539,13 +586,8 @@ uint64_t Pos = MembersOffset; for (const MemberData &M : Members) { if (isAIXBigArchive(Kind)) { - Expected Is64BitOrErr = is64BitSymbolicFile(M.Data); - // If there is an error, the error will have been emitted when - // 'computeMemberData' called the 'getSymbol' function, so don't need to - // handle it here. - if (!Is64BitOrErr) - cantFail(Is64BitOrErr.takeError()); - if (*Is64BitOrErr != Is64Bit) { + Pos += M.PreHeadPadSize; + if (is64BitSymbolicFile(M.SymFile.get()) != Is64Bit) { Pos += M.Header.size() + M.Data.size() + M.Padding.size(); continue; } @@ -629,29 +671,19 @@ return false; } -static Expected> -getSymbols(MemoryBufferRef Buf, uint16_t Index, raw_ostream &SymNames, - SymMap *SymMap, bool &HasObject) { - // In the scenario when LLVMContext is populated SymbolicFile will contain a - // reference to it, thus SymbolicFile should be destroyed first. - LLVMContext Context; - +static Expected> getSymbols(SymbolicFile *Obj, + uint16_t Index, + raw_ostream &SymNames, + SymMap *SymMap) { std::vector Ret; - Expected> ObjOrErr = - getSymbolicFile(Buf, Context); - if (!ObjOrErr) - return ObjOrErr.takeError(); - // If the member is non-symbolic file, treat it as having no symbols. - if (!*ObjOrErr) + if (Obj == nullptr) return Ret; - std::unique_ptr Obj = std::move(*ObjOrErr); - std::map *Map = nullptr; if (SymMap) Map = SymMap->UseECMap && isECObject(*Obj) ? &SymMap->ECMap : &SymMap->Map; - HasObject = true; + for (const object::BasicSymbolRef &S : Obj->symbols()) { if (!isArchiveSymbol(S)) continue; @@ -681,9 +713,9 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, object::Archive::Kind Kind, bool Thin, bool Deterministic, SymtabWritingMode NeedSymbols, SymMap *SymMap, - ArrayRef NewMembers) { + LLVMContext &Context, ArrayRef NewMembers) { static char PaddingData[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'}; - + uint64_t MemHeadPadSize = 0; uint64_t Pos = isAIXBigArchive(Kind) ? sizeof(object::BigArchive::FixLenHdr) : 0; @@ -748,12 +780,16 @@ // The big archive format needs to know the offset of the previous member // header. uint64_t PrevOffset = 0; + uint64_t NextMemHeadPadSize = 0; + std::unique_ptr CurSymFile; + std::unique_ptr NextSymFile; uint16_t Index = 0; - for (const NewArchiveMember &M : NewMembers) { + + for (auto M = NewMembers.begin(); M < NewMembers.end(); ++M) { std::string Header; raw_string_ostream Out(Header); - MemoryBufferRef Buf = M.Buf->getMemBufferRef(); + MemoryBufferRef Buf = M->Buf->getMemBufferRef(); StringRef Data = Thin ? "" : Buf.getBuffer(); Index++; @@ -771,26 +807,76 @@ sys::TimePoint ModTime; if (UniqueTimestamps) // Increment timestamp for each file of a given name. - ModTime = sys::toTimePoint(FilenameCount[M.MemberName]++); + ModTime = sys::toTimePoint(FilenameCount[M->MemberName]++); else - ModTime = M.ModTime; + ModTime = M->ModTime; uint64_t Size = Buf.getBufferSize() + MemberPadding; if (Size > object::Archive::MaxMemberSize) { std::string StringMsg = - "File " + M.MemberName.str() + " exceeds size limit"; + "File " + M->MemberName.str() + " exceeds size limit"; return make_error( std::move(StringMsg), object::object_error::parse_failed); } + if (NeedSymbols != SymtabWritingMode::NoSymtab || isAIXBigArchive(Kind)) { + auto SetNextSymFile = [&NextSymFile, + &Context](MemoryBufferRef Buf, + StringRef MemberName) -> Error { + Expected> SymFileOrErr = + getSymbolicFile(Buf, Context); + if (!SymFileOrErr) + return createFileError(MemberName, SymFileOrErr.takeError()); + NextSymFile = std::move(*SymFileOrErr); + return Error::success(); + }; + + if (M == NewMembers.begin()) + if (Error Err = SetNextSymFile(Buf, M->MemberName)) + return std::move(Err); + + CurSymFile = std::move(NextSymFile); + + if ((M + 1) != NewMembers.end()) + if (Error Err = SetNextSymFile((M + 1)->Buf->getMemBufferRef(), + (M + 1)->MemberName)) + return std::move(Err); + } + + // In the big archive file format, we need to calculate and include the next + // member offset and previous member offset in the file member header. if (isAIXBigArchive(Kind)) { + uint64_t OffsetToMemData = Pos + sizeof(object::BigArMemHdrType) + + alignTo(M->MemberName.size(), 2); + + if (M == NewMembers.begin()) + NextMemHeadPadSize = + alignToPowerOf2(OffsetToMemData, + getMemberAlignment(CurSymFile.get())) - + OffsetToMemData; + + MemHeadPadSize = NextMemHeadPadSize; + Pos += MemHeadPadSize; uint64_t NextOffset = Pos + sizeof(object::BigArMemHdrType) + - alignTo(M.MemberName.size(), 2) + alignTo(Size, 2); - printBigArchiveMemberHeader(Out, M.MemberName, ModTime, M.UID, M.GID, - M.Perms, Size, PrevOffset, NextOffset); + alignTo(M->MemberName.size(), 2) + alignTo(Size, 2); + + // If there is another member file after this, we need to calculate the + // padding before the header. + if ((M + 1) != NewMembers.end()) { + uint64_t OffsetToNextMemData = NextOffset + + sizeof(object::BigArMemHdrType) + + alignTo((M + 1)->MemberName.size(), 2); + NextMemHeadPadSize = + alignToPowerOf2(OffsetToNextMemData, + getMemberAlignment(NextSymFile.get())) - + OffsetToNextMemData; + NextOffset += NextMemHeadPadSize; + } + printBigArchiveMemberHeader(Out, M->MemberName, ModTime, M->UID, M->GID, + M->Perms, Size, PrevOffset, NextOffset); PrevOffset = Pos; } else { - printMemberHeader(Out, Pos, StringTable, MemberNames, Kind, Thin, M, + printMemberHeader(Out, Pos, StringTable, MemberNames, Kind, Thin, *M, ModTime, Size); } Out.flush(); @@ -798,14 +884,17 @@ std::vector Symbols; if (NeedSymbols != SymtabWritingMode::NoSymtab) { Expected> SymbolsOrErr = - getSymbols(Buf, Index, SymNames, SymMap, HasObject); + getSymbols(CurSymFile.get(), Index, SymNames, SymMap); if (!SymbolsOrErr) - return createFileError(M.MemberName, SymbolsOrErr.takeError()); + return createFileError(M->MemberName, SymbolsOrErr.takeError()); Symbols = std::move(*SymbolsOrErr); + if (CurSymFile) + HasObject = true; } Pos += Header.size() + Data.size() + Padding.size(); - Ret.push_back({std::move(Symbols), std::move(Header), Data, Padding}); + Ret.push_back({std::move(Symbols), std::move(Header), Data, Padding, + MemHeadPadSize, std::move(CurSymFile)}); } // If there are no symbols, emit an empty symbol table, to satisfy Solaris // tools, older versions of which expect a symbol table in a non-empty @@ -876,10 +965,14 @@ if (isCOFFArchive(Kind) && NewMembers.size() > 0xfffe) Kind = object::Archive::K_GNU; + // In the scenario when LLVMContext is populated SymbolicFile will contain a + // reference to it, thus SymbolicFile should be destroyed first. + LLVMContext Context; + SymMap.UseECMap = IsEC; Expected> DataOrErr = computeMemberData( StringTable, SymNames, Kind, Thin, Deterministic, WriteSymtab, - isCOFFArchive(Kind) ? &SymMap : nullptr, NewMembers); + isCOFFArchive(Kind) ? &SymMap : nullptr, Context, NewMembers); if (Error E = DataOrErr.takeError()) return E; std::vector &Data = *DataOrErr; @@ -902,6 +995,7 @@ for (const auto &M : Data) { // Record the start of the member's offset + LastMemberEndOffset += M.PreHeadPadSize; LastMemberHeaderOffset = LastMemberEndOffset; // Account for the size of each part associated with the member. LastMemberEndOffset += M.Header.size() + M.Data.size() + M.Padding.size(); @@ -913,13 +1007,9 @@ // members. As a big archive can have both 32-bit and 64-bit file members, // we need to know the number of symbols in each symbol table individually. if (isAIXBigArchive(Kind) && ShouldWriteSymtab) { - Expected Is64BitOrErr = is64BitSymbolicFile(M.Data); - if (Error E = Is64BitOrErr.takeError()) - return E; - - if (!*Is64BitOrErr) - NumSyms32 += M.Symbols.size(); - } + if (!is64BitSymbolicFile(M.SymFile.get())) + NumSyms32 += M.Symbols.size(); + } } std::optional HeadersSize; @@ -1000,13 +1090,14 @@ for (size_t I = 0, Size = NewMembers.size(); I != Size; ++I) { const NewArchiveMember &Member = NewMembers[I]; MemberTableNameStrTblSize += Member.MemberName.size() + 1; + MemberEndOffset += Data[I].PreHeadPadSize; MemberOffsets.push_back(MemberEndOffset); MemberNames.push_back(Member.MemberName); // File member name ended with "`\n". The length is included in // BigArMemHdrType. MemberEndOffset += sizeof(object::BigArMemHdrType) + - alignTo(Data[I].Data.size(), 2) + - alignTo(Member.MemberName.size(), 2); + alignTo(Data[I].Data.size(), 2) + + alignTo(Member.MemberName.size(), 2); } // AIX member table size. @@ -1021,16 +1112,11 @@ if (ShouldWriteSymtab && NumSyms) // Generate the symbol names for the members. - for (const NewArchiveMember &M : NewMembers) { - MemoryBufferRef Buf = M.Buf->getMemBufferRef(); - Expected Is64BitOrErr = is64BitSymbolicFile(Buf.getBuffer()); - if (!Is64BitOrErr) - return Is64BitOrErr.takeError(); - - bool HasObject; - Expected> SymbolsOrErr = - getSymbols(Buf, 0, *Is64BitOrErr ? SymNames64 : SymNames32, nullptr, - HasObject); + for (const auto &M : Data) { + Expected> SymbolsOrErr = getSymbols( + M.SymFile.get(), 0, + is64BitSymbolicFile(M.SymFile.get()) ? SymNames64 : SymNames32, + nullptr); if (!SymbolsOrErr) return SymbolsOrErr.takeError(); } @@ -1069,9 +1155,12 @@ // symbol table. printWithSpacePadding(Out, GlobalSymbolOffset, 20); printWithSpacePadding(Out, GlobalSymbolOffset64, 20); - printWithSpacePadding( - Out, NewMembers.size() ? sizeof(object::BigArchive::FixLenHdr) : 0, - 20); // Offset to first archive member + printWithSpacePadding(Out, + NewMembers.size() + ? sizeof(object::BigArchive::FixLenHdr) + + Data[0].PreHeadPadSize + : 0, + 20); // Offset to first archive member printWithSpacePadding(Out, NewMembers.size() ? LastMemberHeaderOffset : 0, 20); // Offset to last archive member printWithSpacePadding( @@ -1079,6 +1168,7 @@ 20); // Offset to first member of free list - Not supported yet for (const MemberData &M : Data) { + Out << std::string(M.PreHeadPadSize, '\0'); Out << M.Header << M.Data; if (M.Data.size() % 2) Out << '\0'; diff --git a/llvm/test/Object/archive-malformed-object.test b/llvm/test/Object/archive-malformed-object.test --- a/llvm/test/Object/archive-malformed-object.test +++ b/llvm/test/Object/archive-malformed-object.test @@ -18,8 +18,8 @@ # ERR2: error: bad.a: 'input.o': section header table goes past the end of the file: e_shoff = 0x9999 -## Don't emit an error if the symbol table is not required. -# RUN: llvm-ar rcS good.a input.o input.bc +## Don't emit an error if the symbol table is not required for gnu format. +# RUN: llvm-ar --format=gnu rcS good.a input.o input.bc # RUN: llvm-ar t good.a | FileCheck %s --check-prefix=CONTENTS # CONTENTS: input.o diff --git a/llvm/test/tools/llvm-ar/big-archive-xcoff-align.test b/llvm/test/tools/llvm-ar/big-archive-xcoff-align.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-ar/big-archive-xcoff-align.test @@ -0,0 +1,113 @@ +## Test the alignment of XCOFF object files in the big archive format. + +# RUN: rm -rf %t && mkdir %t +# RUN: cd %t + +# RUN: yaml2obj --docnum=1 -DFLAG=0x1DF %s -o t32_1.o +# RUN: yaml2obj --docnum=1 -DFLAG=0x1F7 %s -o t64_1.o +# RUN: yaml2obj --docnum=2 -DFLAG=0x1DF %s -o t32_2.o +# RUN: yaml2obj --docnum=2 -DFLAG=0x1F7 %s -o t64_2.o +# RUN: yaml2obj --docnum=3 -DFLAG=0x1DF %s -o t32_nomaxdata_text.o +# RUN: yaml2obj --docnum=3 -DFLAG=0x1F7 %s -o t64_nomaxdata_text.o +# RUN: yaml2obj --docnum=4 -DFLAG=0x1DF %s -o t32_maxdata_text.o +# RUN: yaml2obj --docnum=4 -DFLAG=0x1F7 %s -o t64_maxdata_text.o +# RUN: yaml2obj --docnum=5 -DFLAG=0x1DF %s -o t32_noloader.o +# RUN: yaml2obj --docnum=5 -DFLAG=0x1F7 %s -o t64_noloader.o +# RUN: yaml2obj --docnum=6 -DFLAG=0x1DF %s -o t32_excess.o +# RUN: yaml2obj --docnum=6 -DFLAG=0x1F7 %s -o t64_excess.o + +## Test that the content of an XCOFF object file, which has an auxiliary header, +## is aligned in a big archive based on the content of auxiliary header. +# RUN: env OBJECT_MODE=32_64 llvm-ar -q t_aux.a t32_nomaxdata_text.o t64_nomaxdata_text.o t32_maxdata_text.o t64_maxdata_text.o t32_noloader.o t64_noloader.o t32_excess.o t64_excess.o +## The content of t32_nomaxdata_text, t64_nomaxdata_text.o aligned at 2. +# RUN: %python -c 'f=open("t_aux.a","rb");f.seek(262);print(f.read(2));f.close()' | FileCheck --check-prefix=MAGIC32 %s +# RUN: %python -c 'f=open("t_aux.a","rb");f.seek(528);print(f.read(2));f.close()' | FileCheck --check-prefix=MAGIC64 %s +## The content of t32_maxdata_text.o, t64_maxdata_text.o aligned at 2^8. +# RUN: %python -c 'f=open("t_aux.a","rb");f.seek(1024);print(f.read(2));f.close()' | FileCheck --check-prefix=MAGIC32 %s +# RUN: %python -c 'f=open("t_aux.a","rb");f.seek(1536);print(f.read(2));f.close()' | FileCheck --check-prefix=MAGIC64 %s +## The content of t32_noloader.o, t64_noloader.o aligned at 2. +# RUN: %python -c 'f=open("t_aux.a","rb");f.seek(1870);print(f.read(2));f.close()' | FileCheck --check-prefix=MAGIC32 %s +# RUN: %python -c 'f=open("t_aux.a","rb");f.seek(2130);print(f.read(2));f.close()' | FileCheck --check-prefix=MAGIC64 %s +## The content of t32_excess.o aligned at word. +# RUN: %python -c 'f=open("t_aux.a","rb");f.seek(2464);print(f.read(2));f.close()' | FileCheck --check-prefix=MAGIC32 %s +## The content of t64_excess.o aligned at 2^12. +# RUN: %python -c 'f=open("t_aux.a","rb");f.seek(4096);print(f.read(2));f.close()' | FileCheck --check-prefix=MAGIC64 %s + +## Test that the content of an XCOFF object files, which have not an auxiliary +## header, is aligned at 2 in a big archive. +# RUN: env OBJECT_MODE=32_64 llvm-ar -q t3.a t32_1.o t64_1.o t32_2.o t64_2.o +# # RUN: %python -c 'f=open("t3.a","rb");f.seek(250);print(f.read(2));f.close()' | FileCheck --check-prefix=MAGIC32 %s +# # RUN: %python -c 'f=open("t3.a","rb");f.seek(432);print(f.read(2));f.close()' | FileCheck -check-prefix=MAGIC64 %s +# # RUN: %python -c 'f=open("t3.a","rb");f.seek(650);print(f.read(2));f.close()' | FileCheck --check-prefix=MAGIC32 %s +# # RUN: %python -c 'f=open("t3.a","rb");f.seek(832);print(f.read(2));f.close()' | FileCheck -check-prefix=MAGIC64 %s + +# MAGIC64: b'\x01\xf7' +# MAGIC32: b'\x01\xdf' + +--- !XCOFF +FileHeader: + MagicNumber: [[FLAG]] +Sections: + - Name: .data + Flags: [ STYP_DATA ] + +--- !XCOFF +FileHeader: + MagicNumber: [[FLAG]] +Sections: + - Name: .text + Flags: [ STYP_DATA ] + +## The auxiliary header does not have both MaxAlignOfData and MaxAlignOfText field +--- !XCOFF +FileHeader: + MagicNumber: [[FLAG]] + AuxiliaryHeaderSize: 12 +AuxiliaryHeader: + Magic: 0x10B + SecNumOfLoader: 1 +Sections: + - Name: .text + Flags: [ STYP_DATA ] + +## The auxiliary header have both MaxAlignOfData and MaxAlignOfText field. +--- !XCOFF +FileHeader: + MagicNumber: [[FLAG]] + AuxiliaryHeaderSize: 48 +AuxiliaryHeader: + Magic: 0x10B + SecNumOfLoader: 1 + MaxAlignOfText: 6 + MaxAlignOfData: 8 +Sections: + - Name: .text + Flags: [ STYP_DATA ] + +## The auxiliary header does not have a loader section. +--- !XCOFF +FileHeader: + MagicNumber: [[FLAG]] + AuxiliaryHeaderSize: 34 +AuxiliaryHeader: + Magic: 0x10B + SecNumOfLoader: 0 + MaxAlignOfText: 14 + MaxAlignOfData: 8 +Sections: + - Name: .text + Flags: [ STYP_DATA ] + +## The auxiliary header have both MaxAlignOfData and MaxAlignOfText field but excess the page size. +--- !XCOFF +FileHeader: + MagicNumber: [[FLAG]] + AuxiliaryHeaderSize: 48 +AuxiliaryHeader: + Magic: 0x10B + SecNumOfLoader: 1 + MaxAlignOfText: 14 + MaxAlignOfData: 8 +Sections: + - Name: .text + Flags: [ STYP_DATA ]