diff --git a/llvm/include/llvm/Object/Archive.h b/llvm/include/llvm/Object/Archive.h --- a/llvm/include/llvm/Object/Archive.h +++ b/llvm/include/llvm/Object/Archive.h @@ -34,38 +34,56 @@ class Archive; -class ArchiveMemberHeader { +class AbstractArchiveMemberHeader { public: friend class Archive; - - ArchiveMemberHeader(Archive const *Parent, const char *RawHeaderPtr, - uint64_t Size, Error *Err); - // ArchiveMemberHeader() = default; - /// Get the name without looking up long names. - Expected getRawName() const; + virtual Expected getRawName() const = 0; /// Get the name looking up long names. - Expected getName(uint64_t Size) const; + virtual Expected getName(uint64_t Size) const = 0; + + virtual Expected getSize() const = 0; - Expected getSize() const; + virtual uint64_t getOffset() const = 0; + virtual StringRef getRawAccessMode() const = 0; + virtual StringRef getRawLastModified() const = 0; + virtual StringRef getRawUID() const = 0; + virtual StringRef getRawGID() const = 0; Expected getAccessMode() const; Expected> getLastModified() const; - - StringRef getRawLastModified() const { - return StringRef(ArMemHdr->LastModified, - sizeof(ArMemHdr->LastModified)).rtrim(' '); - } - Expected getUID() const; Expected getGID() const; // This returns the size of the private struct ArMemHdrType - uint64_t getSizeOf() const { + virtual uint64_t getSizeOf() const = 0; + virtual uint64_t getFixSizeOf() const = 0; +}; + +class ArchiveMemberHeader : public AbstractArchiveMemberHeader { +public: + ArchiveMemberHeader(Archive const *Parent, const char *RawHeaderPtr, + uint64_t Size, Error *Err); + + Expected getRawName() const override; + Expected getName(uint64_t Size) const override; + Expected getSize() const override; + + uint64_t getOffset() const override; + StringRef getRawAccessMode() const override; + StringRef getRawLastModified() const override; + StringRef getRawUID() const override; + StringRef getRawGID() const override; + + uint64_t getSizeOf() const override { return sizeof(ArMemHdrType); } + uint64_t getFixSizeOf() const override { + return 0; + } + private: struct ArMemHdrType { char Name[16]; @@ -76,8 +94,64 @@ char Size[10]; ///< Size of data, not including header or padding. char Terminator[2]; }; + ArMemHdrType const *ArMemHdr; Archive const *Parent; +}; + +class BigArchiveMemberHeader : public AbstractArchiveMemberHeader { +public: + BigArchiveMemberHeader(Archive const *Parent, const char *RawHeaderPtr, + uint64_t Size, Error *Err); + + Expected getRawName() const override; + Expected getName(uint64_t Size) const override; + Expected getSize() const override; + + uint64_t getOffset() const override; + StringRef getRawAccessMode() const override; + StringRef getRawLastModified() const override; + StringRef getRawUID() const override; + StringRef getRawGID() const override; + + // This returns the size of the private struct ArMemHdrType + uint64_t getSizeOf() const override { + return sizeof(ArMemHdrType); + } + + uint64_t getFixSizeOf() const override { + return sizeof(ArFixLenHdrType); + } + +private: + // File Member Header + struct ArMemHdrType { + char Size[20]; + char NextOffset[20]; + char PrevOffset[20]; + char LastModified[12]; + char UID[12]; + char GID[12]; + char AccessMode[12]; + char NameLen[4]; + union { + char Name[2]; + char Terminator[2]; + }; + }; + + // AIX Fixed-Length Header (without magic) + struct ArFixLenHdrType { + char MemOffset[20]; /*Offset to member table */ + char GlobSymOffset[20]; /*Offset to global symbol table */ + char GlobSym64Offset[20]; /*Offset global symbol table for 64-bit objects */ + char FirstArOffset[20]; /*Offset to first archive member */ + char LastArOffset[20]; /*Offset to last archive member */ + char FreeOffset[20]; /*Offset to first mem on free list */ + }; + ArMemHdrType const *ArMemHdr; + ArFixLenHdrType const *ArFixLenHdr; + Archive const *Parent; }; class Archive : public Binary { @@ -86,10 +160,10 @@ public: class Child { friend Archive; - friend ArchiveMemberHeader; + friend AbstractArchiveMemberHeader; const Archive *Parent; - ArchiveMemberHeader Header; + AbstractArchiveMemberHeader *Header; /// Includes header but not padding byte. StringRef Data; /// Offset from Data to the start of the file. @@ -111,21 +185,21 @@ Expected getName() const; Expected getFullName() const; - Expected getRawName() const { return Header.getRawName(); } + Expected getRawName() const { return Header->getRawName(); } Expected> getLastModified() const { - return Header.getLastModified(); + return Header->getLastModified(); } StringRef getRawLastModified() const { - return Header.getRawLastModified(); + return Header->getRawLastModified(); } - Expected getUID() const { return Header.getUID(); } - Expected getGID() const { return Header.getGID(); } + Expected getUID() const { return Header->getUID(); } + Expected getGID() const { return Header->getGID(); } Expected getAccessMode() const { - return Header.getAccessMode(); + return Header->getAccessMode(); } /// \return the size of the archive member without the header or padding. @@ -230,7 +304,8 @@ K_BSD, K_DARWIN, K_DARWIN64, - K_COFF + K_COFF, + K_XCOFF }; Kind kind() const { return (Kind)Format; } @@ -267,6 +342,14 @@ return std::move(ThinBuffers); } + // Total length is needed, because end of file is member table and + // global symbol table. + uint32_t Length = 0; + // All offset are global offset. So, we need to memorize position. + static uint32_t CurrentLocation; + // Fixed length header is treated differently + static uint32_t fixLengthHeader; + private: StringRef SymbolTable; StringRef StringTable; diff --git a/llvm/lib/Object/Archive.cpp b/llvm/lib/Object/Archive.cpp --- a/llvm/lib/Object/Archive.cpp +++ b/llvm/lib/Object/Archive.cpp @@ -40,6 +40,13 @@ const char Magic[] = "!\n"; const char ThinMagic[] = "!\n"; +const char BigMagic[] = "\n"; + +// All magic are 8 caractere long +#define MAGIC_LEN 8 + +uint32_t Archive::CurrentLocation = 0; +uint32_t Archive::fixLengthHeader = 2; void Archive::anchor() {} @@ -53,12 +60,13 @@ ArchiveMemberHeader::ArchiveMemberHeader(const Archive *Parent, const char *RawHeaderPtr, uint64_t Size, Error *Err) - : Parent(Parent), - ArMemHdr(reinterpret_cast(RawHeaderPtr)) { + : Parent(Parent) { if (RawHeaderPtr == nullptr) return; ErrorAsOutParameter ErrAsOutParam(Err); + ArMemHdr = reinterpret_cast(RawHeaderPtr); + if (Size < sizeof(ArMemHdrType)) { if (Err) { std::string Msg("remaining size of archive too small for next archive " @@ -95,6 +103,44 @@ } } +BigArchiveMemberHeader::BigArchiveMemberHeader(const Archive *Parent, + const char *RawHeaderPtr, + uint64_t Size, Error *Err) + : Parent(Parent) { + if (RawHeaderPtr == nullptr) + return; + ErrorAsOutParameter ErrAsOutParam(Err); + + if (Parent->fixLengthHeader) { + // AIX big archive Fixed-Length Header + ArFixLenHdr = reinterpret_cast(RawHeaderPtr); + --Archive::fixLengthHeader; + // We want File member archive only for ArMemHdr. + const char *RawMemberHeaderPtr = RawHeaderPtr + sizeof(ArFixLenHdrType); + ArMemHdr = reinterpret_cast(RawMemberHeaderPtr); + } else { + // AIX without Fixed Size Header. + ArMemHdr = reinterpret_cast(RawHeaderPtr); + } + + if (Size < sizeof(ArMemHdrType)) { + if (Err) { + std::string Msg("remaining size of archive too small for next archive " + "member header "); + Expected NameOrErr = getName(Size); + if (!NameOrErr) { + consumeError(NameOrErr.takeError()); + uint64_t Offset = RawHeaderPtr - Parent->getData().data(); + *Err = malformedError(Msg + "at offset " + Twine(Offset)); + } else + *Err = malformedError(Msg + "for " + NameOrErr.get()); + } + return; + } + + // Terminator is cosmetic only for big archive +} + // This gets the raw name from the ArMemHdr->Name field and checks that it is // valid for the kind of archive. If it is not valid it returns an Error. Expected ArchiveMemberHeader::getRawName() const { @@ -123,6 +169,15 @@ } // This gets the name looking up long names. Size is the size of the archive +Expected BigArchiveMemberHeader::getRawName() const { + // Name is outside ArMemHdr, and there is no end caracter + // name lenght is in NameLen field + // The two first char of name are already in ArMemHdrType + // but unused terminator '`\n' is after the name. + StringRef::size_type end = strtol(ArMemHdr->NameLen, NULL, 10); + return StringRef(ArMemHdr->Name, end); +} + // member including the header, so the size of any name following the header // is checked to make sure it does not overflow. Expected ArchiveMemberHeader::getName(uint64_t Size) const { @@ -223,10 +278,23 @@ return Name.drop_back(1); } +Expected BigArchiveMemberHeader::getName(uint64_t Size) const { + // Size check is different with Big Archive TODO + + // The raw name itself can be invalid. + Expected NameOrErr = getRawName(); + if (!NameOrErr) + return NameOrErr.takeError(); + StringRef Name = NameOrErr.get(); + + // Trim the blanks at the end of the name. + return Name.rtrim(' '); +} + Expected ArchiveMemberHeader::getSize() const { - uint64_t Ret; + uint64_t Size; if (StringRef(ArMemHdr->Size, - sizeof(ArMemHdr->Size)).rtrim(" ").getAsInteger(10, Ret)) { + sizeof(ArMemHdr->Size)).rtrim(" ").getAsInteger(10, Size)) { std::string Buf; raw_string_ostream OS(Buf); OS.write_escaped(StringRef(ArMemHdr->Size, @@ -238,20 +306,65 @@ "all decimal numbers: '" + Buf + "' for archive " "member header at offset " + Twine(Offset)); } - return Ret; + return Size; } -Expected ArchiveMemberHeader::getAccessMode() const { - unsigned Ret; - if (StringRef(ArMemHdr->AccessMode, - sizeof(ArMemHdr->AccessMode)).rtrim(' ').getAsInteger(8, Ret)) { +Expected BigArchiveMemberHeader::getSize() const { + uint64_t Size; + uint64_t NameLen; + if (StringRef(ArMemHdr->Size, + sizeof(ArMemHdr->Size)).rtrim(" ").getAsInteger(10, Size) || + (Parent->kind() == Archive::K_XCOFF && StringRef(ArMemHdr->NameLen, + sizeof(ArMemHdr->NameLen)).rtrim(" ").getAsInteger(10, NameLen))) { std::string Buf; raw_string_ostream OS(Buf); - OS.write_escaped(StringRef(ArMemHdr->AccessMode, - sizeof(ArMemHdr->AccessMode)).rtrim(" ")); + OS.write_escaped(StringRef(ArMemHdr->Size, + sizeof(ArMemHdr->Size)).rtrim(" ")); OS.flush(); uint64_t Offset = reinterpret_cast(ArMemHdr) - Parent->getData().data(); + return malformedError("characters in size field in archive header are not " + "all decimal numbers: '" + Buf + "' for archive " + "member header at offset " + Twine(Offset)); + } + // First read: size is header + object size + name round to be even + Fixed-Length Header + magic + // header is added automatically + if (Parent->CurrentLocation == 0) + return Size + NameLen + NameLen%2 + sizeof(ArFixLenHdrType); + // Next read: size is header + object size + name round to be even + else + return Size + NameLen + NameLen%2; +} + +uint64_t ArchiveMemberHeader::getOffset() const { + uint64_t Offset = reinterpret_cast(ArMemHdr) - + Parent->getData().data(); + return Offset; +} + +uint64_t BigArchiveMemberHeader::getOffset() const { + uint64_t Offset = reinterpret_cast(ArMemHdr) - + Parent->getData().data(); + return Offset; +} + +// This gets the raw name from the ArMemHdr->AccessMode field. +StringRef ArchiveMemberHeader::getRawAccessMode() const { + return StringRef(ArMemHdr->AccessMode, sizeof(ArMemHdr->AccessMode)).rtrim(' '); +} + +StringRef BigArchiveMemberHeader::getRawAccessMode() const { + return StringRef(ArMemHdr->AccessMode, sizeof(ArMemHdr->AccessMode)).rtrim(' '); +} + +Expected AbstractArchiveMemberHeader::getAccessMode() const { + unsigned Ret; + if (getRawAccessMode().getAsInteger(8, Ret)) { + std::string Buf; + raw_string_ostream OS(Buf); + OS.write_escaped(getRawAccessMode()); + OS.flush(); + uint64_t Offset = getOffset(); return malformedError("characters in AccessMode field in archive header " "are not all decimal numbers: '" + Buf + "' for the " "archive member header at offset " + Twine(Offset)); @@ -259,19 +372,24 @@ return static_cast(Ret); } +// This gets the raw name from the ArMemHdr->LastModified field. +StringRef ArchiveMemberHeader::getRawLastModified() const { + return StringRef(ArMemHdr->LastModified, sizeof(ArMemHdr->LastModified)).rtrim(' '); +} + +StringRef BigArchiveMemberHeader::getRawLastModified() const { + return StringRef(ArMemHdr->LastModified, sizeof(ArMemHdr->LastModified)).rtrim(' '); +} + Expected> -ArchiveMemberHeader::getLastModified() const { +AbstractArchiveMemberHeader::getLastModified() const { unsigned Seconds; - if (StringRef(ArMemHdr->LastModified, - sizeof(ArMemHdr->LastModified)).rtrim(' ') - .getAsInteger(10, Seconds)) { + if (getRawLastModified().getAsInteger(10, Seconds)) { std::string Buf; raw_string_ostream OS(Buf); - OS.write_escaped(StringRef(ArMemHdr->LastModified, - sizeof(ArMemHdr->LastModified)).rtrim(" ")); + OS.write_escaped(StringRef(getRawLastModified())); OS.flush(); - uint64_t Offset = reinterpret_cast(ArMemHdr) - - Parent->getData().data(); + uint64_t Offset = getOffset(); return malformedError("characters in LastModified field in archive header " "are not all decimal numbers: '" + Buf + "' for the " "archive member header at offset " + Twine(Offset)); @@ -280,9 +398,18 @@ return sys::toTimePoint(Seconds); } -Expected ArchiveMemberHeader::getUID() const { +// This gets the raw name from the ArMemHdr->UID field. +StringRef ArchiveMemberHeader::getRawUID() const { + return StringRef(ArMemHdr->UID, sizeof(ArMemHdr->UID)).rtrim(' '); +} + +StringRef BigArchiveMemberHeader::getRawUID() const { + return StringRef(ArMemHdr->UID, sizeof(ArMemHdr->UID)).rtrim(' '); +} + +Expected AbstractArchiveMemberHeader::getUID() const { unsigned Ret; - StringRef User = StringRef(ArMemHdr->UID, sizeof(ArMemHdr->UID)).rtrim(' '); + StringRef User = getRawUID(); if (User.empty()) return 0; if (User.getAsInteger(10, Ret)) { @@ -290,8 +417,7 @@ raw_string_ostream OS(Buf); OS.write_escaped(User); OS.flush(); - uint64_t Offset = reinterpret_cast(ArMemHdr) - - Parent->getData().data(); + uint64_t Offset = getOffset(); return malformedError("characters in UID field in archive header " "are not all decimal numbers: '" + Buf + "' for the " "archive member header at offset " + Twine(Offset)); @@ -299,9 +425,18 @@ return Ret; } -Expected ArchiveMemberHeader::getGID() const { +// This gets the raw name from the ArMemHdr->GID field. +StringRef ArchiveMemberHeader::getRawGID() const { + return StringRef(ArMemHdr->GID, sizeof(ArMemHdr->GID)).rtrim(' '); +} + +StringRef BigArchiveMemberHeader::getRawGID() const { + return StringRef(ArMemHdr->GID, sizeof(ArMemHdr->GID)).rtrim(' '); +} + +Expected AbstractArchiveMemberHeader::getGID() const { unsigned Ret; - StringRef Group = StringRef(ArMemHdr->GID, sizeof(ArMemHdr->GID)).rtrim(' '); + StringRef Group = getRawGID(); if (Group.empty()) return 0; if (Group.getAsInteger(10, Ret)) { @@ -309,8 +444,7 @@ raw_string_ostream OS(Buf); OS.write_escaped(Group); OS.flush(); - uint64_t Offset = reinterpret_cast(ArMemHdr) - - Parent->getData().data(); + uint64_t Offset = getOffset(); return malformedError("characters in GID field in archive header " "are not all decimal numbers: '" + Buf + "' for the " "archive member header at offset " + Twine(Offset)); @@ -320,19 +454,30 @@ Archive::Child::Child(const Archive *Parent, StringRef Data, uint16_t StartOfFile) - : Parent(Parent), Header(Parent, Data.data(), Data.size(), nullptr), - Data(Data), StartOfFile(StartOfFile) { + : Parent(Parent), Data(Data), StartOfFile(StartOfFile) { + if (Parent->kind() != K_XCOFF) { + Header = new ArchiveMemberHeader(Parent, Data.data(), Data.size(), nullptr); + } else { + Header = new BigArchiveMemberHeader(Parent, Data.data(), Data.size(), nullptr); + } } Archive::Child::Child(const Archive *Parent, const char *Start, Error *Err) - : Parent(Parent), - Header(Parent, Start, - Parent - ? Parent->getData().size() - (Start - Parent->getData().data()) - : 0, Err) { + : Parent(Parent) { + if (!Start) return; + if (Parent->kind() != K_XCOFF) { + Header = new ArchiveMemberHeader(Parent, Start, + Parent + ? Parent->getData().size() - (Start - Parent->getData().data()) : 0, Err); + } else { + Header = new BigArchiveMemberHeader(Parent, Start, + Parent + ? Parent->getData().size() - (Start - Parent->getData().data()) : 0, Err); + } + // If we are pointed to real data, Start is not a nullptr, then there must be // a non-null Err pointer available to report malformed data on. Only in // the case sentinel value is being constructed is Err is permitted to be a @@ -346,7 +491,7 @@ if (*Err) return; - uint64_t Size = Header.getSizeOf(); + uint64_t Size = Header->getSizeOf(); Data = StringRef(Start, Size); Expected isThinOrErr = isThinMember(); if (!isThinOrErr) { @@ -365,7 +510,7 @@ } // Setup StartOfFile and PaddingBytes. - StartOfFile = Header.getSizeOf(); + StartOfFile = Header->getSizeOf(); // Don't include attached name. Expected NameOrErr = getRawName(); if (!NameOrErr){ @@ -373,6 +518,17 @@ return; } StringRef Name = NameOrErr.get(); + + if (Parent->kind() == Archive::K_XCOFF && Parent->fixLengthHeader) { + // Add name to found the real start + // Add also Fixed-Length Header in the first read. + StartOfFile += Name.size() + Name.size()%2; + StartOfFile += Header->getFixSizeOf(); + } else if (Parent->kind() == Archive::K_XCOFF) { + // Add name to found the real start + StartOfFile += Name.size() + Name.size()%2; + } + if (Name.startswith("#1/")) { uint64_t NameSize; if (Name.substr(3).rtrim(' ').getAsInteger(10, NameSize)) { @@ -393,16 +549,16 @@ Expected Archive::Child::getSize() const { if (Parent->IsThin) - return Header.getSize(); + return Header->getSize(); return Data.size() - StartOfFile; } Expected Archive::Child::getRawSize() const { - return Header.getSize(); + return Header->getSize(); } Expected Archive::Child::isThinMember() const { - Expected NameOrErr = Header.getRawName(); + Expected NameOrErr = Header->getRawName(); if (!NameOrErr) return NameOrErr.takeError(); StringRef Name = NameOrErr.get(); @@ -456,13 +612,20 @@ ++SpaceToSkip; const char *NextLoc = Data.data() + SpaceToSkip; + // Update current + CurrentLocation += SpaceToSkip; + // On AIX, stop on Member table // Check to see if this is at the end of the archive. - if (NextLoc == Parent->Data.getBufferEnd()) + if (NextLoc == Parent->Data.getBufferEnd() || + (Parent->kind() == K_XCOFF && CurrentLocation + MAGIC_LEN == Parent->Length) ) + { return Child(nullptr, nullptr, nullptr); + } // Check to see if this is past the end of the archive. - if (NextLoc > Parent->Data.getBufferEnd()) { + if ((NextLoc > Parent->Data.getBufferEnd()) || + (Parent->kind() == K_XCOFF && Parent->CurrentLocation + MAGIC_LEN > Parent->Length)) { std::string Msg("offset to next archive member past the end of the archive " "after member "); Expected NameOrErr = getName(); @@ -493,7 +656,7 @@ if (!RawSizeOrErr) return RawSizeOrErr.takeError(); uint64_t RawSize = RawSizeOrErr.get(); - Expected NameOrErr = Header.getName(Header.getSizeOf() + RawSize); + Expected NameOrErr = Header->getName(Header->getSizeOf() + RawSize); if (!NameOrErr) return NameOrErr.takeError(); StringRef Name = NameOrErr.get(); @@ -545,6 +708,8 @@ IsThin = true; } else if (Buffer.startswith(Magic)) { IsThin = false; + } else if (Buffer.startswith(BigMagic)) { + IsThin = false; } else { Err = make_error("file too small to be an archive", object_error::invalid_file_type); @@ -556,7 +721,10 @@ // archive which is the same in all formats. So claiming it to be gnu to is // fine if not totally correct before we look for a string table or table of // contents. - Format = K_GNU; + if (Buffer.startswith(BigMagic)) { + Format = K_XCOFF; + } else + Format = K_GNU; // Get the special members. child_iterator I = child_begin(Err, false); @@ -586,6 +754,17 @@ } StringRef Name = NameOrErr.get(); + // AIX big archive is totally different that all other. + if (Buffer.startswith(BigMagic)) { + Format = K_XCOFF; + // Length of archive (all object file + header) + // is offset to member table, located at 8->27. + Buffer.substr(8, 20).rtrim(" ").getAsInteger(10, Length); + setFirstRegular(*C); + Err = Error::success(); + return; + } + // Below is the pattern that is used to figure out the archive format // GNU archive format // First member : / (may exist, if it exists, points to the symbol table ) @@ -797,6 +976,9 @@ const char *Offsets = Buf; if (Parent->kind() == K_GNU64 || Parent->kind() == K_DARWIN64) Offsets += sizeof(uint64_t); + else if (Parent->kind() == K_XCOFF) + Offsets += 20; + // Each offset is 20 bytes long else Offsets += sizeof(uint32_t); uint64_t Offset = 0; @@ -820,6 +1002,8 @@ // the archive of the member that defines the symbol. Which is what // is needed here. Offset = read64le(Offsets + SymbolIndex * 16 + 8); + } else if (Parent->kind() == K_XCOFF) { + Offset = read64be(Offsets + (SymbolIndex + 1) * 20); } else { // Skip offsets. uint32_t MemberCount = read32le(Buf); @@ -935,6 +1119,9 @@ // Skip the byte count of the string table. buf += sizeof(uint64_t); buf += ran_strx; + } else if (kind() == K_XCOFF) { + uint64_t symbol_count = read64be(buf); + buf += sizeof(uint64_t) + (symbol_count * (sizeof(uint64_t))); } else { uint32_t member_count = 0; uint32_t symbol_count = 0; @@ -963,6 +1150,8 @@ return read32le(buf) / 8; if (kind() == K_DARWIN64) return read64le(buf) / 16; + if (kind() == K_XCOFF) + return read64be(buf); uint32_t member_count = 0; member_count = read32le(buf); buf += 4 + (member_count * 4); // Skip offsets.