Index: llvm/include/llvm/Object/ELFObjectFile.h =================================================================== --- llvm/include/llvm/Object/ELFObjectFile.h +++ llvm/include/llvm/Object/ELFObjectFile.h @@ -394,7 +394,7 @@ const Elf_Rel *getRel(DataRefImpl Rel) const; const Elf_Rela *getRela(DataRefImpl Rela) const; - const Elf_Sym *getSymbol(DataRefImpl Sym) const { + virtual const Elf_Sym *getSymbol(DataRefImpl Sym) const { auto Ret = EF.template getEntry(Sym.d.a, Sym.d.b); if (!Ret) report_fatal_error(errorToErrorCode(Ret.takeError()).message()); @@ -408,8 +408,8 @@ basic_symbol_iterator symbol_begin() const override; basic_symbol_iterator symbol_end() const override; - elf_symbol_iterator dynamic_symbol_begin() const; - elf_symbol_iterator dynamic_symbol_end() const; + virtual elf_symbol_iterator dynamic_symbol_begin() const; + virtual elf_symbol_iterator dynamic_symbol_end() const; section_iterator section_begin() const override; section_iterator section_end() const override; Index: llvm/include/llvm/Object/MutableELFObject.h =================================================================== --- llvm/include/llvm/Object/MutableELFObject.h +++ llvm/include/llvm/Object/MutableELFObject.h @@ -40,6 +40,18 @@ operator const Elf_Shdr &() const { return Header; } }; +template struct MutableELFSymbol { + using Elf_Sym = Elf_Sym_Impl; + + Elf_Sym Header; + std::string Name; + + MutableELFSymbol(const Elf_Sym &Header, StringRef Name) + : Header(Header), Name(Name) {} + + operator const Elf_Sym &() const { return Header; } +}; + template class MutableELFObject : public ELFObjectFile { /// This class is used for a 'copy on write' effect with tables in an ELF /// object file. @@ -52,14 +64,20 @@ /// The table keeps a list of mappings, these mappings can have one of two /// states either Original or New. In the case of Original the index /// associated with the mapping is into the original table in the file. For - /// a New mapping, the index is into the NewValues vector. This design allows - /// fewer copies to be made than there would otherwise need to be, entries - /// with no modifications never get copied and the only overhead for those is - /// an index. Entries which get modified can have richer types during program - /// executation than are allowed by the object file standard. + /// a New mapping, the index is into the NewValues vector. A Removed mapping + /// means that an entry has been removed and should be skipped over when + /// iterating over the MutableTable. Notably removed values stay in the table + /// to preserve the index of all other other entries, similarly insertion is + /// not supported and only the equivalent to push_back, add. + /// + /// The design of MutableTable allows fewer copies to be made than there + /// would otherwise need to be, entries with no modifications never get + /// copied and the only overhead for those is an index. Entries which get + /// modified can have richer types during program executation than are + /// allowed by the object file standard. template class MutableTable { struct MappingType { - enum MappedType { Original, New }; + enum MappedType { Original, New, Removed }; uint64_t Index; MappedType Type; @@ -88,11 +106,40 @@ /// operator OrigType to make the proper conversion. const OrigType &operator[](uint64_t Index) const { assert(Index < Mappings.size() && "Out of bounds"); + assert(Mappings[Index].Type != MappingType::Removed && + "Entry was removed"); if (Mappings[Index].Type == MappingType::New) return static_cast(NewValues[Mappings[Index]]); return OriginalValues[Mappings[Index]]; } + /// operator[] should not do this work because otherwise indexes would + /// effectively be clobbered. Uses a default of UINT64_MAX so that it + /// overflows to 0 to get the first element. + uint64_t getNextIndex( + uint64_t CurrentIndex = std::numeric_limits::max()) const { + /// <= so that Index can reach an end iterator state. + while (Mappings[++CurrentIndex].Type == MappingType::Removed && + CurrentIndex <= Mappings.size()) + ; + return CurrentIndex; + } + + uint64_t getRelativeIndex(uint64_t Index) const { + uint64_t Ret = 0; + for (uint64_t I = 0; I < Index; ++I) + if (Mappings[I].Type != MappingType::Removed) + ++Ret; + return Ret; + } + + uint64_t getFirstNotRemoved() const { + for (uint64_t I = 0;; ++I) + if (Mappings[I].Type != MappingType::Removed || I == end()) + return I; + llvm_unreachable(""); + } + /// Get the OrigType at index Index. This method ignores any changes made /// and always returns the OrigType from its original state at its original /// index. @@ -101,6 +148,18 @@ return OriginalValues[Index]; } + /// Remove the entry at index Index. + void remove(uint64_t Index) { + assert(Index < OriginalValues.size() && "Out of bounds"); + Mappings[Index].Type = MappingType::Removed; + } + + /// Adds a NewType to the back of the list. + void add(NewType &&New) { + NewValues.push_back(New); + Mappings.emplace_back(NewValues.size() - 1, MappingType::New); + } + /// If the entry at index Index has already been made mutable, this returns /// a reference to that. Otherwise, this replaces the current entry at the /// specified index with a NewType constructued with Arguments. @@ -118,19 +177,35 @@ /// has had makeMutable called on it. Otherwise this method returns nullptr. const NewType *getConstIfNew(uint64_t Index) const { assert(Index < Mappings.size() && "Out of bounds"); + assert(Mappings[Index].Type != MappingType::Removed); return Mappings[Index].Type == MappingType::New ? &NewValues[Mappings[Index]] : nullptr; } + /// Returns the index of the last element, this is different than size + /// because size returns the number of valid (non removed) entries. + size_t end() const { return Mappings.size(); } + /// Return the number of elements in the table. - size_t size() const { return Mappings.size(); } + size_t size() const { + return llvm::count_if(Mappings, [](MappingType &Mapping) { + return Mapping.Type != MappingType::Removed; + }); + } + + size_t originalSize() const { return OriginalValues.size(); } }; using Elf_Shdr = Elf_Shdr_Impl; using Elf_Ehdr = Elf_Ehdr_Impl; + using Elf_Sym = Elf_Sym_Impl; + + using MutableSymbolTable = MutableTable>; MutableTable> Sections; + MutableSymbolTable Symbols; + MutableSymbolTable DynSymbols; const Elf_Ehdr &getHeader() const { return *reinterpret_cast(this->base()); @@ -153,25 +228,86 @@ Expected> getSectionContents(DataRefImpl Sec) const override; - static DataRefImpl toDataRef(uintptr_t Ptr) { - DataRefImpl Ref; - Ref.p = Ptr; - return Ref; + void moveSymbolNext(DataRefImpl &Sym) const override; + Expected getSymbolName(DataRefImpl Sym) const override; + ArrayRef findSymbolTable(uint64_t ShType) const; + uint32_t findSectionOfType(uint64_t ShType) const; + + const MutableSymbolTable &getWhichTable(DataRefImpl Sym) const { + return Sections.getOriginal(Sym.d.a).sh_type == ELF::SHT_SYMTAB + ? Symbols + : DynSymbols; } + static DataRefImpl toDataRef(uintptr_t Ptr); + static DataRefImpl toDataRef(uint32_t A, uint32_t B); + public: explicit MutableELFObject(ELFObjectFile &&B) : ELFObjectFile(std::move(B)), Sections(ArrayRef(reinterpret_cast( this->base() + getHeader().e_shoff), - getHeader().e_shnum)) {} + getHeader().e_shnum)), + Symbols(findSymbolTable(ELF::SHT_SYMTAB)), + DynSymbols(findSymbolTable(ELF::SHT_DYNSYM)) {} section_iterator section_begin() const override { return section_iterator(SectionRef(toDataRef(0), this)); } section_iterator section_end() const override { - return section_iterator(SectionRef(toDataRef(Sections.size()), this)); + return section_iterator(SectionRef(toDataRef(Sections.end()), this)); + } + + basic_symbol_iterator symbol_begin() const override { + return basic_symbol_iterator( + SymbolRef(toDataRef(findSectionOfType(ELF::SHT_SYMTAB), + Symbols.getFirstNotRemoved()), + this)); + } + + basic_symbol_iterator symbol_end() const override { + return basic_symbol_iterator(SymbolRef( + toDataRef(findSectionOfType(ELF::SHT_SYMTAB), Symbols.end()), this)); + } + + elf_symbol_iterator dynamic_symbol_begin() const override { + return basic_symbol_iterator( + SymbolRef(toDataRef(findSectionOfType(ELF::SHT_DYNSYM), + DynSymbols.getFirstNotRemoved()), + this)); + } + + elf_symbol_iterator dynamic_symbol_end() const override { + return basic_symbol_iterator(SymbolRef( + toDataRef(findSectionOfType(ELF::SHT_DYNSYM), DynSymbols.end()), this)); + } + + /// Returns a mutable reference to the symbol at the specified index. + Expected &> getMutableSymbol(SymbolRef Sym) { + Expected Name = getSymbolName(Sym.getRawDataRefImpl()); + if (!Name) + return Name.takeError(); + return Symbols.makeMutable(Sym.getRawDataRefImpl().d.b, + Symbols[Sym.getRawDataRefImpl().d.b], *Name); + } + + Expected &> getMutableSymbol(symbol_iterator Sym) { + return getMutableSymbol(*Sym); + } + + /// Returns a mutable reference to the dynamic symbol at the specified index. + Expected &> getDynMutableSymbol(SymbolRef Sym) { + Expected Name = getSymbolName(Sym.getRawDataRefImpl()); + if (!Name) + return Name.takeError(); + return DynSymbols.makeMutable(Sym.getRawDataRefImpl().d.b, + DynSymbols[Sym.getRawDataRefImpl().d.b], + *Name); + } + + Expected &> getMutableDynSymbol(symbol_iterator Sym) { + return getMutableDynSymbol(*Sym); } /// Returns a mutable reference to the section pointed to by Sec. A possible @@ -185,13 +321,7 @@ /// MutSecOrErr->Header.sh_addrallgin = 1; /// } /// @endcode - Expected &> getMutableSection(SectionRef Sec) { - const Elf_Shdr_Impl &Header = Sections[Sec.getRawDataRefImpl().p]; - Expected Name = getSectionName(Sec.getRawDataRefImpl()); - if (!Name) - return Name.takeError(); - return Sections.makeMutable(Sec.getRawDataRefImpl().p, Header, *Name, this); - } + Expected &> getMutableSection(SectionRef Sec); /// Returns a mutable reference to the section pointed to by the /// section_iterator. It is usually more ergonomic to use the overload @@ -199,16 +329,28 @@ Expected &> getMutableSection(section_iterator Sec) { return getMutableSection(*Sec); } + + /// Removes a symbol. + void removeSymbol(SymbolRef Sym) { + assert(Sections.getOriginal(Sym.getRawDataRefImpl().d.a).sh_type == + ELF::SHT_SYMTAB && + "Not pointing to symbol table"); + Symbols.remove(Sym.getRawDataRefImpl().d.b); + } + + /// Removes a symbol. + void removeSymbol(symbol_iterator Sym) { removeSymbol(*Sym); } + }; template void MutableELFObject::moveSectionNext(DataRefImpl &Sec) const { - ++Sec.p; + Sec.p = Sections.getNextIndex(Sec.p); } template uint64_t MutableELFObject::getSectionIndex(DataRefImpl Sec) const { - return Sec.p; + return Sections.getRelativeIndex(Sec.p); } template @@ -227,6 +369,69 @@ return ELFObjectFile::getSectionContents(Sec); } +template +void MutableELFObject::moveSymbolNext(DataRefImpl &Sym) const { + Sym.d.b = Symbols.getNextIndex(Sym.d.b); +} + +template +Expected +MutableELFObject::getSymbolName(DataRefImpl Sym) const { + const MutableSymbolTable &SymbolTable = getWhichTable(Sym); + if (const MutableELFSymbol *SymOrNull = + SymbolTable.getConstIfNew(Sym.d.b)) + return SymOrNull->Name; + return ELFObjectFile::getSymbolName(Sym); +} + +template +ArrayRef> +MutableELFObject::findSymbolTable(uint64_t ShType) const { + assert(ShType == ELF::SHT_SYMTAB || + ShType == ELF::SHT_DYNSYM && "Not a symbol table type"); + for (const auto &Sec : this->sections()) + if (ELFSectionRef(Sec).getType() == ShType) + return ArrayRef( + reinterpret_cast(this->base() + + ELFSectionRef(Sec).getOffset()), + Sec.getSize() / sizeof(Elf_Sym)); + + return {}; +} + +template +uint32_t MutableELFObject::findSectionOfType(uint64_t ShType) const { + for (uint32_t I = 0; I < Sections.originalSize(); ++I) + if (Sections.getOriginal(I).sh_type == ShType) + return I; + return 0; +} + +template +DataRefImpl MutableELFObject::toDataRef(uintptr_t Ptr) { + DataRefImpl Ref; + Ref.p = Ptr; + return Ref; +} + +template +DataRefImpl MutableELFObject::toDataRef(uint32_t A, uint32_t B) { + DataRefImpl Ref; + Ref.d.a = A; + Ref.d.b = B; + return Ref; +} + +template +Expected &> +MutableELFObject::getMutableSection(SectionRef Sec) { + const Elf_Shdr_Impl &Header = Sections[Sec.getRawDataRefImpl().p]; + Expected Name = getSectionName(Sec.getRawDataRefImpl()); + if (!Name) + return Name.takeError(); + return Sections.makeMutable(Sec.getRawDataRefImpl().p, Header, *Name, this); +} + } // namespace object } // namespace llvm Index: llvm/unittests/Object/MutableELFObjectTest.cpp =================================================================== --- llvm/unittests/Object/MutableELFObjectTest.cpp +++ llvm/unittests/Object/MutableELFObjectTest.cpp @@ -82,10 +82,8 @@ // Copy every section header but make no changes. SectionRefs now point to // section headers outside of the file's mapping. - for (auto Iter = MutableObject.section_begin(), - End = MutableObject.section_end(); - Iter != End; ++Iter) { - auto Expect = MutableObject.getMutableSection(Iter); + for (const SectionRef &Sec : MutableObject.sections()) { + auto Expect = MutableObject.getMutableSection(Sec); if (!Expect) consumeError(Expect.takeError()); } @@ -214,3 +212,231 @@ std::distance(MutableObject.section_begin(), MutableObject.section_end()); EXPECT_EQ(NewNumSections, NumSections); } + +// Test basic public methods on symbols. +TEST(MutableELFObject, BasicSymbol) { + SmallString<0> Storage; + Expected> ErrOrObj = yaml2ObjectFile(Storage, R"( +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Symbols: + - Name: test + Index: SHN_ABS + Value: 0x1234 + Binding: STB_LOCAL + - Name: second + Index: SHN_ABS + Value: 1 + Binding: STB_GLOBAL)"); + + ASSERT_THAT_EXPECTED(ErrOrObj, Succeeded()); + auto *ELFObjFile = dyn_cast>(ErrOrObj->get()); + ASSERT_TRUE(ELFObjFile); + MutableELFObject MutableObject(std::move(*ELFObjFile)); + + auto TestSym = symbol_iterator(++MutableObject.symbol_begin()); + EXPECT_EQ(TestSym->getValue(), 0x1234U); + EXPECT_FALSE(TestSym->getFlags() & SymbolRef::SF_Global); + auto NameOrErr = TestSym->getName(); + ASSERT_THAT_EXPECTED(NameOrErr, Succeeded()); + EXPECT_EQ(*NameOrErr, "test"); + + auto SecondSym = symbol_iterator(++TestSym); + EXPECT_EQ(SecondSym->getValue(), 1U); + EXPECT_TRUE(SecondSym->getFlags() & SymbolRef::SF_Global); + NameOrErr = SecondSym->getName(); + ASSERT_THAT_EXPECTED(NameOrErr, Succeeded()); + EXPECT_EQ(*NameOrErr, "second"); +} + +// Test that there is no change public methods on SymbolRef between +// MutableELFObjectFile and ELFObjectFile. +TEST(MutableELFObject, SameAsELFObjectFile) { + StringRef Yaml = R"( +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +DynamicSymbols: + - Name: test + Index: SHN_ABS + Value: 0x1234 + Binding: STB_GLOBAL + - Name: second + Index: SHN_ABS + Value: 0 + Binding: STB_GLOBAL)"; + + SmallString<0> Storage; + raw_svector_ostream OS(Storage); + yaml::Input Input(Yaml); + ASSERT_THAT_ERROR(convertYAML(Input, OS), Succeeded()); + + Expected> ObjOrErr = + ObjectFile::createObjectFile(MemoryBufferRef(OS.str(), "YamlObject")); + ASSERT_THAT_EXPECTED(ObjOrErr, Succeeded()); + auto *ELFObjFile = dyn_cast>(ObjOrErr->get()); + ASSERT_TRUE(ELFObjFile); + const ObjectFile &ObjFile = *ELFObjFile; + + Expected> MutObjOrErr = + ObjectFile::createObjectFile(MemoryBufferRef(OS.str(), "YamlObject")); + ASSERT_THAT_EXPECTED(MutObjOrErr, Succeeded()); + ELFObjFile = dyn_cast>(MutObjOrErr->get()); + ASSERT_TRUE(ELFObjFile); + MutableELFObject MutableObject(std::move(*ELFObjFile)); + + auto TestSymbols = [](SymbolRef ObjFile, SymbolRef MutObj) { + EXPECT_EQ(ObjFile.getValue(), MutObj.getValue()); + EXPECT_EQ(ObjFile.getAlignment(), MutObj.getAlignment()); + EXPECT_EQ(ObjFile.getCommonSize(), MutObj.getCommonSize()); + EXPECT_EQ(ObjFile.getValue(), MutObj.getValue()); + + auto ObjNameOrErr = ObjFile.getName(); + ASSERT_THAT_EXPECTED(ObjNameOrErr, Succeeded()); + auto MutNameOrErr = MutObj.getName(); + ASSERT_THAT_EXPECTED(MutNameOrErr, Succeeded()); + EXPECT_EQ(*ObjNameOrErr, *MutNameOrErr); + + auto ObjAddrOrErr = ObjFile.getAddress(); + ASSERT_THAT_EXPECTED(ObjAddrOrErr, Succeeded()); + auto MutAddrOrErr = MutObj.getAddress(); + ASSERT_THAT_EXPECTED(MutAddrOrErr, Succeeded()); + EXPECT_EQ(*ObjAddrOrErr, *MutAddrOrErr); + + auto ObjTypeOrErr = ObjFile.getType(); + ASSERT_THAT_EXPECTED(ObjTypeOrErr, Succeeded()); + auto MutTypeOrErr = MutObj.getType(); + ASSERT_THAT_EXPECTED(MutTypeOrErr, Succeeded()); + EXPECT_EQ(*ObjTypeOrErr, *MutTypeOrErr); + }; + + for (const auto &Tuple : zip(ObjFile.symbols(), MutableObject.symbols())) + TestSymbols(std::get<0>(Tuple), std::get<1>(Tuple)); + + for (const SymbolRef &Sym : MutableObject.symbols()) { + auto MutSymOrErr = MutableObject.getMutableSymbol(Sym); + if (!MutSymOrErr) + consumeError(MutSymOrErr.takeError()); + } + + for (const auto &Tuple : zip(ObjFile.symbols(), MutableObject.symbols())) + TestSymbols(std::get<0>(Tuple), std::get<1>(Tuple)); +} + +// Test basic public methods on dynamic symbols. +TEST(MutableELFObject, MutateSymbol) { + SmallString<0> Storage; + Expected> ErrOrObj = yaml2ObjectFile(Storage, R"( +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Symbols: + - Name: test + Index: SHN_ABS + Value: 2 + Binding: STB_GLOBAL)"); + + ASSERT_THAT_EXPECTED(ErrOrObj, Succeeded()); + auto *ELFObjFile = dyn_cast>(ErrOrObj->get()); + ASSERT_TRUE(ELFObjFile); + MutableELFObject MutableObject(std::move(*ELFObjFile)); + + auto TestSym = symbol_iterator(++MutableObject.symbol_begin()); + Expected &> MutSymOrErr = + MutableObject.getMutableSymbol(TestSym); + ASSERT_THAT_EXPECTED(MutSymOrErr, Succeeded()); + EXPECT_EQ(MutSymOrErr->Header.st_value, 2u); + MutSymOrErr->Header.st_value = 5; + EXPECT_EQ(MutSymOrErr->Header.st_value, 5u); + + EXPECT_EQ(MutSymOrErr->Name, "test"); + MutSymOrErr->Name = "new_name"; + EXPECT_EQ(MutSymOrErr->Name, "new_name"); + TestSym = symbol_iterator(++MutableObject.symbol_begin()); + auto NameOrErr = TestSym->getName(); + ASSERT_THAT_EXPECTED(NameOrErr, Succeeded()); + EXPECT_EQ(*NameOrErr, "new_name"); +} + +// Test basic public methods on dynamic symbols. +TEST(MutableELFObject, BasicDynSymb) { + SmallString<0> Storage; + Expected> ErrOrObj = yaml2ObjectFile(Storage, R"( +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +DynamicSymbols: + - Name: test + Index: SHN_ABS + Value: 0x1234 + Binding: STB_GLOBAL)"); + + ASSERT_THAT_EXPECTED(ErrOrObj, Succeeded()); + auto *ELFObjFile = dyn_cast>(ErrOrObj->get()); + ASSERT_TRUE(ELFObjFile); + MutableELFObject MutableObject(std::move(*ELFObjFile)); + + auto DynSym = symbol_iterator(++MutableObject.dynamic_symbol_begin()); + EXPECT_EQ(DynSym->getValue(), 0x1234U); + EXPECT_TRUE(DynSym->getFlags() & SymbolRef::SF_Global); + auto NameOrErr = DynSym->getName(); + ASSERT_THAT_EXPECTED(NameOrErr, Succeeded()); + EXPECT_EQ(*NameOrErr, "test"); +} + +TEST(MutableELFObject, RemoveSymbols) { + SmallString<0> Storage; + Expected> ErrOrObj = yaml2ObjectFile(Storage, R"( +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 +Symbols: + - Name: first + Index: SHN_ABS + Value: 0x1234 + Binding: STB_LOCAL + - Name: second + Index: SHN_ABS + Value: 0 + Binding: STB_GLOBAL)"); + + ASSERT_THAT_EXPECTED(ErrOrObj, Succeeded()); + auto *ELFObjFile = dyn_cast>(ErrOrObj->get()); + ASSERT_TRUE(ELFObjFile); + MutableELFObject MutableObject(std::move(*ELFObjFile)); + + auto Distance = + std::distance(MutableObject.symbol_begin(), MutableObject.symbol_end()); + EXPECT_EQ(Distance, 3); + + auto FirstSym = ++MutableObject.symbol_begin(); + auto NameOrErr = symbol_iterator(FirstSym)->getName(); + ASSERT_THAT_EXPECTED(NameOrErr, Succeeded()); + EXPECT_EQ(*NameOrErr, "first"); + MutableObject.removeSymbol(FirstSym); + + auto NewFirstSym = ++MutableObject.symbol_begin(); + auto NewNameOrErr = symbol_iterator(NewFirstSym)->getName(); + ASSERT_THAT_EXPECTED(NewNameOrErr, Succeeded()); + EXPECT_EQ(*NewNameOrErr, "second"); + + Distance = + std::distance(MutableObject.symbol_begin(), MutableObject.symbol_end()); + EXPECT_EQ(Distance, 2); +}