Index: llvm/include/llvm/Object/Archive.h =================================================================== --- llvm/include/llvm/Object/Archive.h +++ llvm/include/llvm/Object/Archive.h @@ -34,38 +34,60 @@ class Archive; -class ArchiveMemberHeader { +class AbstractArchiveMemberHeader { public: friend class Archive; - - ArchiveMemberHeader(Archive const *Parent, const char *RawHeaderPtr, - uint64_t Size, Error *Err); - // ArchiveMemberHeader() = default; - + // clone() is used to create a new object identical to original. + virtual AbstractArchiveMemberHeader *clone() const = 0; + virtual ~AbstractArchiveMemberHeader(){}; /// Get the name without looking up long names. - Expected getRawName() const; + virtual Expected getRawName() const = 0; /// Get the name looking up long names. - Expected getName(uint64_t Size) const; + virtual Expected getName(uint64_t Size) const = 0; - Expected getSize() const; + virtual Expected getSize() const = 0; + // Raw access and helper getters + virtual uint64_t getOffset() const = 0; + virtual StringRef getRawAccessMode() const = 0; + virtual StringRef getRawLastModified() const = 0; + virtual StringRef getRawUID() const = 0; + virtual StringRef getRawGID() const = 0; + + // Non-Raw getters Expected getAccessMode() const; Expected> getLastModified() const; - - StringRef getRawLastModified() const { - return StringRef(ArMemHdr->LastModified, - sizeof(ArMemHdr->LastModified)).rtrim(' '); - } - Expected getUID() const; Expected getGID() const; // This returns the size of the private struct ArMemHdrType - uint64_t getSizeOf() const { - return sizeof(ArMemHdrType); + virtual uint64_t getSizeOf() const = 0; + // This returns the size of the private struct ArFixLenHdrType + virtual uint64_t getFixSizeOf() const = 0; +}; + +class ArchiveMemberHeader : public AbstractArchiveMemberHeader { +public: + ArchiveMemberHeader(Archive const *Parent, const char *RawHeaderPtr, + uint64_t Size, Error *Err); + ArchiveMemberHeader *clone() const override { + return new ArchiveMemberHeader(*this); } + Expected getRawName() const override; + Expected getName(uint64_t Size) const override; + Expected getSize() const override; + + uint64_t getOffset() const override; + StringRef getRawAccessMode() const override; + StringRef getRawLastModified() const override; + StringRef getRawUID() const override; + StringRef getRawGID() const override; + + uint64_t getSizeOf() const override { return sizeof(ArMemHdrType); } + uint64_t getFixSizeOf() const override { return 0; } + private: struct ArMemHdrType { char Name[16]; @@ -76,8 +98,61 @@ char Size[10]; ///< Size of data, not including header or padding. char Terminator[2]; }; + ArMemHdrType const *ArMemHdr; Archive const *Parent; +}; + +class BigArchiveMemberHeader : public AbstractArchiveMemberHeader { +public: + BigArchiveMemberHeader(Archive const *Parent, const char *RawHeaderPtr, + uint64_t Size, Error *Err); + BigArchiveMemberHeader *clone() const override { + return new BigArchiveMemberHeader(*this); + } + + Expected getRawName() const override; + Expected getName(uint64_t Size) const override; + Expected getSize() const override; + + uint64_t getOffset() const override; + StringRef getRawAccessMode() const override; + StringRef getRawLastModified() const override; + StringRef getRawUID() const override; + StringRef getRawGID() const override; + + uint64_t getSizeOf() const override { return sizeof(ArMemHdrType); } + uint64_t getFixSizeOf() const override { return sizeof(ArFixLenHdrType); } + +private: + // File Member Header + struct ArMemHdrType { + char Size[20]; // File member size in decimal + char NextOffset[20]; // Next member offset in decimal + char PrevOffset[20]; // Previous member offset in decimal + char LastModified[12]; + char UID[12]; + char GID[12]; + char AccessMode[12]; + char NameLen[4]; // File member name length in decimal + union { + char Name[2]; // Start of member name + char Terminator[2]; + }; + }; + + // Fixed-Length Header (without magic) + struct ArFixLenHdrType { + char MemOffset[20]; /*Offset to member table */ + char GlobSymOffset[20]; /*Offset to global symbol table */ + char GlobSym64Offset[20]; /*Offset global symbol table for 64-bit objects */ + char FirstArOffset[20]; /*Offset to first archive member */ + char LastArOffset[20]; /*Offset to last archive member */ + char FreeOffset[20]; /*Offset to first mem on free list */ + }; + ArMemHdrType const *ArMemHdr; + ArFixLenHdrType const *ArFixLenHdr; + Archive const *Parent; }; class Archive : public Binary { @@ -86,10 +161,10 @@ public: class Child { friend Archive; - friend ArchiveMemberHeader; + friend AbstractArchiveMemberHeader; const Archive *Parent; - ArchiveMemberHeader Header; + AbstractArchiveMemberHeader *Header; /// Includes header but not padding byte. StringRef Data; /// Offset from Data to the start of the file. @@ -100,8 +175,60 @@ public: Child(const Archive *Parent, const char *Start, Error *Err); Child(const Archive *Parent, StringRef Data, uint16_t StartOfFile); + Child(const Child &C) + : Parent(C.Parent), Data(C.Data), StartOfFile(C.StartOfFile) { + Header = nullptr; + if (C.Header) + Header = C.Header->clone(); + } + Child(Child &&C) { + Parent = C.Parent; + C.Parent = nullptr; + Header = C.Header; + C.Header = nullptr; + Data = C.Data; + StartOfFile = C.StartOfFile; + } + ~Child() { + if (Header) + delete Header; + } + + Child &operator=(Child &&C) noexcept { + if (&C == this) + return *this; + Parent = C.Parent; + C.Parent = nullptr; + + if (Header) + delete Header; + Header = C.Header; + C.Header = nullptr; + + Data = C.Data; + StartOfFile = C.StartOfFile; - bool operator ==(const Child &other) const { + return *this; + } + + Child &operator=(const Child &C) { + if (&C == this) + return *this; + + if (Header) + delete Header; + Header = nullptr; + + Parent = C.Parent; + if (C.Header) + Header = C.Header->clone(); + Data = C.Data; + StartOfFile = C.StartOfFile; + + return *this; + } + + bool operator==(const Child &other) const { assert(!Parent || !other.Parent || Parent == other.Parent); return Data.begin() == other.Data.begin(); } @@ -111,21 +238,21 @@ Expected getName() const; Expected getFullName() const; - Expected getRawName() const { return Header.getRawName(); } + Expected getRawName() const { return Header->getRawName(); } Expected> getLastModified() const { - return Header.getLastModified(); + return Header->getLastModified(); } StringRef getRawLastModified() const { - return Header.getRawLastModified(); + return Header->getRawLastModified(); } - Expected getUID() const { return Header.getUID(); } - Expected getGID() const { return Header.getGID(); } + Expected getUID() const { return Header->getUID(); } + Expected getGID() const { return Header->getGID(); } Expected getAccessMode() const { - return Header.getAccessMode(); + return Header->getAccessMode(); } /// \return the size of the archive member without the header or padding. @@ -182,11 +309,9 @@ public: Symbol(const Archive *p, uint32_t symi, uint32_t stri) - : Parent(p) - , SymbolIndex(symi) - , StringIndex(stri) {} + : Parent(p), SymbolIndex(symi), StringIndex(stri) {} - bool operator ==(const Symbol &other) const { + bool operator==(const Symbol &other) const { return (Parent == other.Parent) && (SymbolIndex == other.SymbolIndex); } @@ -212,7 +337,7 @@ return !(*this == other); } - symbol_iterator& operator++() { // Preincrement + symbol_iterator &operator++() { // Preincrement symbol = symbol.getNext(); return *this; } @@ -224,14 +349,7 @@ /// Size field is 10 decimal digits long static const uint64_t MaxMemberSize = 9999999999; - enum Kind { - K_GNU, - K_GNU64, - K_BSD, - K_DARWIN, - K_DARWIN64, - K_COFF - }; + enum Kind { K_GNU, K_GNU64, K_BSD, K_DARWIN, K_DARWIN64, K_COFF, K_XCOFF }; Kind kind() const { return (Kind)Format; } bool isThin() const { return IsThin; } @@ -250,9 +368,7 @@ } // Cast methods. - static bool classof(Binary const *v) { - return v->isArchive(); - } + static bool classof(Binary const *v) { return v->isArchive(); } // check if a symbol is in the archive Expected> findSym(StringRef name) const; @@ -267,6 +383,15 @@ return std::move(ThinBuffers); } + // On Big Archive, total length is needed + // because end of file is member table and global symbol table. + uint32_t Length = 0; + // On Big Archive, all offset are global offset, + // so we need to memorize position. + static uint32_t CurrentLocation; + // On Big archive, fixed length header is treated differently + static uint32_t fixLengthHeader; + private: StringRef SymbolTable; StringRef StringTable; Index: llvm/lib/Object/Archive.cpp =================================================================== --- llvm/lib/Object/Archive.cpp +++ llvm/lib/Object/Archive.cpp @@ -40,11 +40,17 @@ const char Magic[] = "!\n"; const char ThinMagic[] = "!\n"; +const char BigMagic[] = "\n"; + +// All magic are 8 caractere long +#define MAGIC_LEN 8 + +uint32_t Archive::CurrentLocation = 0; +uint32_t Archive::fixLengthHeader = 2; void Archive::anchor() {} -static Error -malformedError(Twine Msg) { +static Error malformedError(Twine Msg) { std::string StringMsg = "truncated or malformed archive (" + Msg.str() + ")"; return make_error(std::move(StringMsg), object_error::parse_failed); @@ -53,12 +59,13 @@ ArchiveMemberHeader::ArchiveMemberHeader(const Archive *Parent, const char *RawHeaderPtr, uint64_t Size, Error *Err) - : Parent(Parent), - ArMemHdr(reinterpret_cast(RawHeaderPtr)) { + : Parent(Parent) { if (RawHeaderPtr == nullptr) return; ErrorAsOutParameter ErrAsOutParam(Err); + ArMemHdr = reinterpret_cast(RawHeaderPtr); + if (Size < sizeof(ArMemHdrType)) { if (Err) { std::string Msg("remaining size of archive too small for next archive " @@ -77,8 +84,8 @@ if (Err) { std::string Buf; raw_string_ostream OS(Buf); - OS.write_escaped(StringRef(ArMemHdr->Terminator, - sizeof(ArMemHdr->Terminator))); + OS.write_escaped( + StringRef(ArMemHdr->Terminator, sizeof(ArMemHdr->Terminator))); OS.flush(); std::string Msg("terminator characters in archive member \"" + Buf + "\" not the correct \"`\\n\" values for the archive " @@ -95,6 +102,45 @@ } } +BigArchiveMemberHeader::BigArchiveMemberHeader(const Archive *Parent, + const char *RawHeaderPtr, + uint64_t Size, Error *Err) + : Parent(Parent) { + if (RawHeaderPtr == nullptr) + return; + ErrorAsOutParameter ErrAsOutParam(Err); + + // Begin of the archive: check if it is Fixed-Length Header. + if (Parent->fixLengthHeader) { + // Fixed-Length Header first + ArFixLenHdr = reinterpret_cast(RawHeaderPtr); + --Archive::fixLengthHeader; + // File-Member Header + const char *RawMemberHeaderPtr = RawHeaderPtr + sizeof(ArFixLenHdrType); + ArMemHdr = reinterpret_cast(RawMemberHeaderPtr); + } else { + // Only File-Member Header + ArMemHdr = reinterpret_cast(RawHeaderPtr); + } + + if (Size < sizeof(ArMemHdrType)) { + if (Err) { + std::string Msg("remaining size of archive too small for next archive " + "member header "); + Expected NameOrErr = getName(Size); + if (!NameOrErr) { + consumeError(NameOrErr.takeError()); + uint64_t Offset = RawHeaderPtr - Parent->getData().data(); + *Err = malformedError(Msg + "at offset " + Twine(Offset)); + } else + *Err = malformedError(Msg + "for " + NameOrErr.get()); + } + return; + } + + // Terminator is cosmetic only for big archive +} + // This gets the raw name from the ArMemHdr->Name field and checks that it is // valid for the kind of archive. If it is not valid it returns an Error. Expected ArchiveMemberHeader::getRawName() const { @@ -102,14 +148,14 @@ auto Kind = Parent->kind(); if (Kind == Archive::K_BSD || Kind == Archive::K_DARWIN64) { if (ArMemHdr->Name[0] == ' ') { - uint64_t Offset = reinterpret_cast(ArMemHdr) - - Parent->getData().data(); + uint64_t Offset = + reinterpret_cast(ArMemHdr) - Parent->getData().data(); return malformedError("name contains a leading space for archive member " - "header at offset " + Twine(Offset)); + "header at offset " + + Twine(Offset)); } EndCond = ' '; - } - else if (ArMemHdr->Name[0] == '/' || ArMemHdr->Name[0] == '#') + } else if (ArMemHdr->Name[0] == '/' || ArMemHdr->Name[0] == '#') EndCond = ' '; else EndCond = '/'; @@ -123,6 +169,15 @@ } // This gets the name looking up long names. Size is the size of the archive +Expected BigArchiveMemberHeader::getRawName() const { + // Name is outside ArMemHdr, and there is no end caracter + // name lenght is in NameLen field + // The two first char of name are already in ArMemHdrType + // but unused terminator '`\n' is after the name. + StringRef::size_type end = strtol(ArMemHdr->NameLen, NULL, 10); + return StringRef(ArMemHdr->Name, end); +} + // member including the header, so the size of any name following the header // is checked to make sure it does not overflow. Expected ArchiveMemberHeader::getName(uint64_t Size) const { @@ -131,8 +186,8 @@ // archive header is truncated to produce an error message with the name. // Make sure the name field is not truncated. if (Size < offsetof(ArMemHdrType, Name) + sizeof(ArMemHdr->Name)) { - uint64_t ArchiveOffset = reinterpret_cast(ArMemHdr) - - Parent->getData().data(); + uint64_t ArchiveOffset = + reinterpret_cast(ArMemHdr) - Parent->getData().data(); return malformedError("archive header truncated before the name field " "for archive member header at offset " + Twine(ArchiveOffset)); @@ -158,21 +213,25 @@ raw_string_ostream OS(Buf); OS.write_escaped(Name.substr(1).rtrim(' ')); OS.flush(); - uint64_t ArchiveOffset = reinterpret_cast(ArMemHdr) - - Parent->getData().data(); + uint64_t ArchiveOffset = + reinterpret_cast(ArMemHdr) - Parent->getData().data(); return malformedError("long name offset characters after the '/' are " - "not all decimal numbers: '" + Buf + "' for " + "not all decimal numbers: '" + + Buf + + "' for " "archive member header at offset " + Twine(ArchiveOffset)); } // Verify it. if (StringOffset >= Parent->getStringTable().size()) { - uint64_t ArchiveOffset = reinterpret_cast(ArMemHdr) - - Parent->getData().data(); - return malformedError("long name offset " + Twine(StringOffset) + " past " + uint64_t ArchiveOffset = + reinterpret_cast(ArMemHdr) - Parent->getData().data(); + return malformedError("long name offset " + Twine(StringOffset) + + " past " "the end of the string table for archive member " - "header at offset " + Twine(ArchiveOffset)); + "header at offset " + + Twine(ArchiveOffset)); } // GNU long file names end with a "/\n". @@ -196,23 +255,26 @@ raw_string_ostream OS(Buf); OS.write_escaped(Name.substr(3).rtrim(' ')); OS.flush(); - uint64_t ArchiveOffset = reinterpret_cast(ArMemHdr) - - Parent->getData().data(); + uint64_t ArchiveOffset = + reinterpret_cast(ArMemHdr) - Parent->getData().data(); return malformedError("long name length characters after the #1/ are " - "not all decimal numbers: '" + Buf + "' for " + "not all decimal numbers: '" + + Buf + + "' for " "archive member header at offset " + Twine(ArchiveOffset)); } if (getSizeOf() + NameLength > Size) { - uint64_t ArchiveOffset = reinterpret_cast(ArMemHdr) - - Parent->getData().data(); + uint64_t ArchiveOffset = + reinterpret_cast(ArMemHdr) - Parent->getData().data(); return malformedError("long name length: " + Twine(NameLength) + " extends past the end of the member or archive " "for archive member header at offset " + Twine(ArchiveOffset)); } return StringRef(reinterpret_cast(ArMemHdr) + getSizeOf(), - NameLength).rtrim('\0'); + NameLength) + .rtrim('\0'); } // It is not a long name so trim the blanks at the end of the name. @@ -223,66 +285,156 @@ return Name.drop_back(1); } +Expected BigArchiveMemberHeader::getName(uint64_t Size) const { + // The raw name itself can be invalid. + Expected NameOrErr = getRawName(); + if (!NameOrErr) + return NameOrErr.takeError(); + StringRef Name = NameOrErr.get(); + + // Trim the blanks at the end of the name. + return Name.rtrim(' '); +} + Expected ArchiveMemberHeader::getSize() const { - uint64_t Ret; - if (StringRef(ArMemHdr->Size, - sizeof(ArMemHdr->Size)).rtrim(" ").getAsInteger(10, Ret)) { + uint64_t Size; + if (StringRef(ArMemHdr->Size, sizeof(ArMemHdr->Size)) + .rtrim(" ") + .getAsInteger(10, Size)) { std::string Buf; raw_string_ostream OS(Buf); - OS.write_escaped(StringRef(ArMemHdr->Size, - sizeof(ArMemHdr->Size)).rtrim(" ")); + OS.write_escaped( + StringRef(ArMemHdr->Size, sizeof(ArMemHdr->Size)).rtrim(" ")); OS.flush(); - uint64_t Offset = reinterpret_cast(ArMemHdr) - - Parent->getData().data(); + uint64_t Offset = + reinterpret_cast(ArMemHdr) - Parent->getData().data(); return malformedError("characters in size field in archive header are not " - "all decimal numbers: '" + Buf + "' for archive " - "member header at offset " + Twine(Offset)); + "all decimal numbers: '" + + Buf + + "' for archive " + "member header at offset " + + Twine(Offset)); } - return Ret; + return Size; +} + +Expected BigArchiveMemberHeader::getSize() const { + uint64_t Size; + uint64_t NameLen; + if (StringRef(ArMemHdr->Size, sizeof(ArMemHdr->Size)) + .rtrim(" ") + .getAsInteger(10, Size) || + (Parent->kind() == Archive::K_XCOFF && + StringRef(ArMemHdr->NameLen, sizeof(ArMemHdr->NameLen)) + .rtrim(" ") + .getAsInteger(10, NameLen))) { + std::string Buf; + raw_string_ostream OS(Buf); + OS.write_escaped( + StringRef(ArMemHdr->Size, sizeof(ArMemHdr->Size)).rtrim(" ")); + OS.flush(); + uint64_t Offset = + reinterpret_cast(ArMemHdr) - Parent->getData().data(); + return malformedError("characters in size field in archive header are not " + "all decimal numbers: '" + + Buf + + "' for archive " + "member header at offset " + + Twine(Offset)); + } + // First read: size is header + object size + name round to be even + + // Fixed-Length Header + magic header is added automatically + if (Parent->CurrentLocation == 0) + return Size + NameLen + NameLen % 2 + sizeof(ArFixLenHdrType); + // Next read: size is header + object size + name round to be even + else + return Size + NameLen + NameLen % 2; +} + +uint64_t ArchiveMemberHeader::getOffset() const { + uint64_t Offset = + reinterpret_cast(ArMemHdr) - Parent->getData().data(); + return Offset; +} + +uint64_t BigArchiveMemberHeader::getOffset() const { + uint64_t Offset = + reinterpret_cast(ArMemHdr) - Parent->getData().data(); + return Offset; +} + +// This gets the raw access mode from the ArMemHdr->AccessMode field. +StringRef ArchiveMemberHeader::getRawAccessMode() const { + return StringRef(ArMemHdr->AccessMode, sizeof(ArMemHdr->AccessMode)) + .rtrim(' '); +} + +StringRef BigArchiveMemberHeader::getRawAccessMode() const { + return StringRef(ArMemHdr->AccessMode, sizeof(ArMemHdr->AccessMode)) + .rtrim(' '); } -Expected ArchiveMemberHeader::getAccessMode() const { +Expected AbstractArchiveMemberHeader::getAccessMode() const { unsigned Ret; - if (StringRef(ArMemHdr->AccessMode, - sizeof(ArMemHdr->AccessMode)).rtrim(' ').getAsInteger(8, Ret)) { + if (getRawAccessMode().getAsInteger(8, Ret)) { std::string Buf; raw_string_ostream OS(Buf); - OS.write_escaped(StringRef(ArMemHdr->AccessMode, - sizeof(ArMemHdr->AccessMode)).rtrim(" ")); + OS.write_escaped(getRawAccessMode()); OS.flush(); - uint64_t Offset = reinterpret_cast(ArMemHdr) - - Parent->getData().data(); + uint64_t Offset = getOffset(); return malformedError("characters in AccessMode field in archive header " - "are not all decimal numbers: '" + Buf + "' for the " - "archive member header at offset " + Twine(Offset)); + "are not all decimal numbers: '" + + Buf + + "' for the " + "archive member header at offset " + + Twine(Offset)); } return static_cast(Ret); } +// This gets ArMemHdr->LastModified field. +StringRef ArchiveMemberHeader::getRawLastModified() const { + return StringRef(ArMemHdr->LastModified, sizeof(ArMemHdr->LastModified)) + .rtrim(' '); +} + +StringRef BigArchiveMemberHeader::getRawLastModified() const { + return StringRef(ArMemHdr->LastModified, sizeof(ArMemHdr->LastModified)) + .rtrim(' '); +} + Expected> -ArchiveMemberHeader::getLastModified() const { +AbstractArchiveMemberHeader::getLastModified() const { unsigned Seconds; - if (StringRef(ArMemHdr->LastModified, - sizeof(ArMemHdr->LastModified)).rtrim(' ') - .getAsInteger(10, Seconds)) { + if (getRawLastModified().getAsInteger(10, Seconds)) { std::string Buf; raw_string_ostream OS(Buf); - OS.write_escaped(StringRef(ArMemHdr->LastModified, - sizeof(ArMemHdr->LastModified)).rtrim(" ")); + OS.write_escaped(StringRef(getRawLastModified())); OS.flush(); - uint64_t Offset = reinterpret_cast(ArMemHdr) - - Parent->getData().data(); + uint64_t Offset = getOffset(); return malformedError("characters in LastModified field in archive header " - "are not all decimal numbers: '" + Buf + "' for the " - "archive member header at offset " + Twine(Offset)); + "are not all decimal numbers: '" + + Buf + + "' for the " + "archive member header at offset " + + Twine(Offset)); } return sys::toTimePoint(Seconds); } -Expected ArchiveMemberHeader::getUID() const { +// This gets the raw UID from the ArMemHdr->UID field. +StringRef ArchiveMemberHeader::getRawUID() const { + return StringRef(ArMemHdr->UID, sizeof(ArMemHdr->UID)).rtrim(' '); +} + +StringRef BigArchiveMemberHeader::getRawUID() const { + return StringRef(ArMemHdr->UID, sizeof(ArMemHdr->UID)).rtrim(' '); +} + +Expected AbstractArchiveMemberHeader::getUID() const { unsigned Ret; - StringRef User = StringRef(ArMemHdr->UID, sizeof(ArMemHdr->UID)).rtrim(' '); + StringRef User = getRawUID(); if (User.empty()) return 0; if (User.getAsInteger(10, Ret)) { @@ -290,18 +442,29 @@ raw_string_ostream OS(Buf); OS.write_escaped(User); OS.flush(); - uint64_t Offset = reinterpret_cast(ArMemHdr) - - Parent->getData().data(); + uint64_t Offset = getOffset(); return malformedError("characters in UID field in archive header " - "are not all decimal numbers: '" + Buf + "' for the " - "archive member header at offset " + Twine(Offset)); + "are not all decimal numbers: '" + + Buf + + "' for the " + "archive member header at offset " + + Twine(Offset)); } return Ret; } -Expected ArchiveMemberHeader::getGID() const { +// This gets the raw GID from the ArMemHdr->GID field. +StringRef ArchiveMemberHeader::getRawGID() const { + return StringRef(ArMemHdr->GID, sizeof(ArMemHdr->GID)).rtrim(' '); +} + +StringRef BigArchiveMemberHeader::getRawGID() const { + return StringRef(ArMemHdr->GID, sizeof(ArMemHdr->GID)).rtrim(' '); +} + +Expected AbstractArchiveMemberHeader::getGID() const { unsigned Ret; - StringRef Group = StringRef(ArMemHdr->GID, sizeof(ArMemHdr->GID)).rtrim(' '); + StringRef Group = getRawGID(); if (Group.empty()) return 0; if (Group.getAsInteger(10, Ret)) { @@ -309,29 +472,52 @@ raw_string_ostream OS(Buf); OS.write_escaped(Group); OS.flush(); - uint64_t Offset = reinterpret_cast(ArMemHdr) - - Parent->getData().data(); + uint64_t Offset = getOffset(); return malformedError("characters in GID field in archive header " - "are not all decimal numbers: '" + Buf + "' for the " - "archive member header at offset " + Twine(Offset)); + "are not all decimal numbers: '" + + Buf + + "' for the " + "archive member header at offset " + + Twine(Offset)); } return Ret; } +// Child constructors Archive::Child::Child(const Archive *Parent, StringRef Data, uint16_t StartOfFile) - : Parent(Parent), Header(Parent, Data.data(), Data.size(), nullptr), - Data(Data), StartOfFile(StartOfFile) { + : Parent(Parent), Data(Data), StartOfFile(StartOfFile) { + // Create the right concrete archive member as a function of Kind. + if (Parent->kind() != K_XCOFF) { + Header = new ArchiveMemberHeader(Parent, Data.data(), Data.size(), nullptr); + } else { + Header = + new BigArchiveMemberHeader(Parent, Data.data(), Data.size(), nullptr); + } } Archive::Child::Child(const Archive *Parent, const char *Start, Error *Err) - : Parent(Parent), - Header(Parent, Start, - Parent - ? Parent->getData().size() - (Start - Parent->getData().data()) - : 0, Err) { - if (!Start) + : Parent(Parent) { + + if (!Start) { + Header = nullptr; return; + } + + // Create the right concrete archive member as a function of Kind. + if (Parent->kind() != K_XCOFF) { + Header = new ArchiveMemberHeader( + Parent, Start, + Parent ? Parent->getData().size() - (Start - Parent->getData().data()) + : 0, + Err); + } else { + Header = new BigArchiveMemberHeader( + Parent, Start, + Parent ? Parent->getData().size() - (Start - Parent->getData().data()) + : 0, + Err); + } // If we are pointed to real data, Start is not a nullptr, then there must be // a non-null Err pointer available to report malformed data on. Only in @@ -346,7 +532,7 @@ if (*Err) return; - uint64_t Size = Header.getSizeOf(); + uint64_t Size = Header->getSizeOf(); Data = StringRef(Start, Size); Expected isThinOrErr = isThinMember(); if (!isThinOrErr) { @@ -365,14 +551,25 @@ } // Setup StartOfFile and PaddingBytes. - StartOfFile = Header.getSizeOf(); + StartOfFile = Header->getSizeOf(); // Don't include attached name. Expected NameOrErr = getRawName(); - if (!NameOrErr){ + if (!NameOrErr) { *Err = NameOrErr.takeError(); return; } StringRef Name = NameOrErr.get(); + + if (Parent->kind() == Archive::K_XCOFF && Parent->fixLengthHeader) { + // Add name to found the real start + // Add also Fixed-Length Header in the first read. + StartOfFile += Name.size() + Name.size() % 2; + StartOfFile += Header->getFixSizeOf(); + } else if (Parent->kind() == Archive::K_XCOFF) { + // Add name to found the real start + StartOfFile += Name.size() + Name.size() % 2; + } + if (Name.startswith("#1/")) { uint64_t NameSize; if (Name.substr(3).rtrim(' ').getAsInteger(10, NameSize)) { @@ -382,7 +579,9 @@ OS.flush(); uint64_t Offset = Start - Parent->getData().data(); *Err = malformedError("long name length characters after the #1/ are " - "not all decimal numbers: '" + Buf + "' for " + "not all decimal numbers: '" + + Buf + + "' for " "archive member header at offset " + Twine(Offset)); return; @@ -393,16 +592,16 @@ Expected Archive::Child::getSize() const { if (Parent->IsThin) - return Header.getSize(); + return Header->getSize(); return Data.size() - StartOfFile; } Expected Archive::Child::getRawSize() const { - return Header.getSize(); + return Header->getSize(); } Expected Archive::Child::isThinMember() const { - Expected NameOrErr = Header.getRawName(); + Expected NameOrErr = Header->getRawName(); if (!NameOrErr) return NameOrErr.takeError(); StringRef Name = NameOrErr.get(); @@ -456,13 +655,22 @@ ++SpaceToSkip; const char *NextLoc = Data.data() + SpaceToSkip; + // Update current + CurrentLocation += SpaceToSkip; // Check to see if this is at the end of the archive. - if (NextLoc == Parent->Data.getBufferEnd()) + // With BigArchive, stop on Member table. + if (NextLoc == Parent->Data.getBufferEnd() || + (Parent->kind() == K_XCOFF && + CurrentLocation + MAGIC_LEN == Parent->Length)) { return Child(nullptr, nullptr, nullptr); + } // Check to see if this is past the end of the archive. - if (NextLoc > Parent->Data.getBufferEnd()) { + // With BigArchive, stop on Member table. + if ((NextLoc > Parent->Data.getBufferEnd()) || + (Parent->kind() == K_XCOFF && + Parent->CurrentLocation + MAGIC_LEN > Parent->Length)) { std::string Msg("offset to next archive member past the end of the archive " "after member "); Expected NameOrErr = getName(); @@ -493,7 +701,8 @@ if (!RawSizeOrErr) return RawSizeOrErr.takeError(); uint64_t RawSize = RawSizeOrErr.get(); - Expected NameOrErr = Header.getName(Header.getSizeOf() + RawSize); + Expected NameOrErr = + Header->getName(Header->getSizeOf() + RawSize); if (!NameOrErr) return NameOrErr.takeError(); StringRef Name = NameOrErr.get(); @@ -545,6 +754,8 @@ IsThin = true; } else if (Buffer.startswith(Magic)) { IsThin = false; + } else if (Buffer.startswith(BigMagic)) { + IsThin = false; } else { Err = make_error("file too small to be an archive", object_error::invalid_file_type); @@ -556,7 +767,10 @@ // archive which is the same in all formats. So claiming it to be gnu to is // fine if not totally correct before we look for a string table or table of // contents. - Format = K_GNU; + if (Buffer.startswith(BigMagic)) { + Format = K_XCOFF; + } else + Format = K_GNU; // Get the special members. child_iterator I = child_begin(Err, false); @@ -586,6 +800,17 @@ } StringRef Name = NameOrErr.get(); + // AIX Big Archive is totally different that all other. + if (Buffer.startswith(BigMagic)) { + Format = K_XCOFF; + // Length of archive (all object file + header) + // is offset to member table, located at 8->27. + Buffer.substr(8, 20).rtrim(" ").getAsInteger(10, Length); + setFirstRegular(*C); + Err = Error::success(); + return; + } + // Below is the pattern that is used to figure out the archive format // GNU archive format // First member : / (may exist, if it exists, points to the symbol table ) @@ -646,8 +871,7 @@ SymbolTable = BufOrErr.get(); if (Increment()) return; - } - else if (Name == "__.SYMDEF_64 SORTED" || Name == "__.SYMDEF_64") { + } else if (Name == "__.SYMDEF_64 SORTED" || Name == "__.SYMDEF_64") { Format = K_DARWIN64; // We know that the symbol table is not an external file, but we still // must check any Expected<> return value. @@ -797,6 +1021,9 @@ const char *Offsets = Buf; if (Parent->kind() == K_GNU64 || Parent->kind() == K_DARWIN64) Offsets += sizeof(uint64_t); + else if (Parent->kind() == K_XCOFF) + Offsets += 20; + // Each offset is 20 bytes long else Offsets += sizeof(uint32_t); uint64_t Offset = 0; @@ -820,6 +1047,8 @@ // the archive of the member that defines the symbol. Which is what // is needed here. Offset = read64le(Offsets + SymbolIndex * 16 + 8); + } else if (Parent->kind() == K_XCOFF) { + Offset = read64be(Offsets + (SymbolIndex + 1) * 20); } else { // Skip offsets. uint32_t MemberCount = read32le(Buf); @@ -935,6 +1164,9 @@ // Skip the byte count of the string table. buf += sizeof(uint64_t); buf += ran_strx; + } else if (kind() == K_XCOFF) { + uint64_t symbol_count = read64be(buf); + buf += sizeof(uint64_t) + (symbol_count * (sizeof(uint64_t))); } else { uint32_t member_count = 0; uint32_t symbol_count = 0; @@ -963,6 +1195,8 @@ return read32le(buf) / 8; if (kind() == K_DARWIN64) return read64le(buf) / 16; + if (kind() == K_XCOFF) + return read64be(buf); uint32_t member_count = 0; member_count = read32le(buf); buf += 4 + (member_count * 4); // Skip offsets. Index: llvm/test/Object/archive-big-read.test =================================================================== --- /dev/null +++ llvm/test/Object/archive-big-read.test @@ -0,0 +1,12 @@ +RUN: cd %p + +Test reading an archive created by AIX ar (Big Archive) +RUN: env TZ=GMT llvm-ar tv Inputs/Big.a | FileCheck %s -strict-whitespace +CHECK: rw-r--r-- 0/0 8 Apr 21 14:12 2021 evenlen +CHECK-NEXT: rw-r--r-- 0/0 7 Apr 21 14:12 2021 oddlen +CHECK-NEXT: rw-r--r-- 0/0 258 Apr 21 14:15 2021 empty.o + +Test extraction of a file +RUN: llvm-ar p Inputs/Big.a evenlen | FileCheck %s -check-prefix=EVENLEN +EVENLEN: evenlen +