Changeset View
Standalone View
llvm/include/llvm/Object/ELF.h
//===- ELF.h - ELF object file implementation -------------------*- C++ -*-===// | //===- ELF.h - ELF object file implementation -------------------*- C++ -*-===// | ||||||||
// | // | ||||||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||||||
// See https://llvm.org/LICENSE.txt for license information. | // See https://llvm.org/LICENSE.txt for license information. | ||||||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||||
// | // | ||||||||
//===----------------------------------------------------------------------===// | //===----------------------------------------------------------------------===// | ||||||||
// | // | ||||||||
// This file declares the ELFFile template class. | // This file declares the ELFFile template class. | ||||||||
// | // | ||||||||
//===----------------------------------------------------------------------===// | //===----------------------------------------------------------------------===// | ||||||||
#ifndef LLVM_OBJECT_ELF_H | #ifndef LLVM_OBJECT_ELF_H | ||||||||
#define LLVM_OBJECT_ELF_H | #define LLVM_OBJECT_ELF_H | ||||||||
#include "llvm/ADT/ArrayRef.h" | #include "llvm/ADT/ArrayRef.h" | ||||||||
#include "llvm/ADT/SmallString.h" | |||||||||
#include "llvm/ADT/SmallVector.h" | #include "llvm/ADT/SmallVector.h" | ||||||||
#include "llvm/ADT/StringRef.h" | #include "llvm/ADT/StringRef.h" | ||||||||
#include "llvm/BinaryFormat/ELF.h" | #include "llvm/BinaryFormat/ELF.h" | ||||||||
#include "llvm/MC/StringTableBuilder.h" | |||||||||
#include "llvm/Object/ELFTypes.h" | #include "llvm/Object/ELFTypes.h" | ||||||||
#include "llvm/Object/Error.h" | #include "llvm/Object/Error.h" | ||||||||
#include "llvm/Support/Endian.h" | #include "llvm/Support/Endian.h" | ||||||||
#include "llvm/Support/Error.h" | #include "llvm/Support/Error.h" | ||||||||
#include "llvm/Support/raw_ostream.h" | |||||||||
#include <cassert> | #include <cassert> | ||||||||
#include <cstddef> | #include <cstddef> | ||||||||
#include <cstdint> | #include <cstdint> | ||||||||
#include <limits> | #include <limits> | ||||||||
#include <utility> | #include <utility> | ||||||||
namespace llvm { | namespace llvm { | ||||||||
namespace object { | namespace object { | ||||||||
▲ Show 20 Lines • Show All 145 Lines • ▼ Show 20 Lines | public: | ||||||||
const uint8_t *base() const { return Buf.bytes_begin(); } | const uint8_t *base() const { return Buf.bytes_begin(); } | ||||||||
const uint8_t *end() const { return base() + getBufSize(); } | const uint8_t *end() const { return base() + getBufSize(); } | ||||||||
size_t getBufSize() const { return Buf.size(); } | size_t getBufSize() const { return Buf.size(); } | ||||||||
private: | private: | ||||||||
StringRef Buf; | StringRef Buf; | ||||||||
std::vector<Elf_Shdr> FakeSections; | std::vector<Elf_Shdr> FakeSections; | ||||||||
SmallString<0> FakeSectionStrings; | |||||||||
MaskRay: Use `SmallString<0> FakeSectionStrings`.
Prefer `SmallString` because `std::string` has a… | |||||||||
But I think it'd be better to use StringTableBuilder as @jhenderson said. namhyung: But I think it'd be better to use StringTableBuilder as @jhenderson said. | |||||||||
Not Done ReplyInline ActionsI think StringTableBuilder just adds complexity/overhead in this case. StringTableBuilder cannot deduplicate any string here (and the code does not need to optimize for size). MaskRay: I think `StringTableBuilder` just adds complexity/overhead in this case. `StringTableBuilder`… | |||||||||
Not Done ReplyInline ActionsI disagree that it adds complexity. It seems more intuitive to do StrTab.add(someString) than std::copy(someString.begin(), someString.end(), std::back_inserter(StrTab)); followed by a null terminator addition. I do agree that the optimizations are unnecessary. Maybe there's room for a "simple" string builder class that does none of the deduplication/optimization? jhenderson: I disagree that it adds complexity. It seems more intuitive to do `StrTab.add(someString)` than… | |||||||||
Not Done ReplyInline ActionsI agree with @jhenderson. The API in the StringTableBuilder is simpler and more intuitive to me. Also finalizeInOrder() is even not trying to optimize it. namhyung: I agree with @jhenderson. The API in the StringTableBuilder is simpler and more intuitive to… | |||||||||
ELFFile(StringRef Object); | ELFFile(StringRef Object); | ||||||||
public: | public: | ||||||||
const Elf_Ehdr &getHeader() const { | const Elf_Ehdr &getHeader() const { | ||||||||
return *reinterpret_cast<const Elf_Ehdr *>(base()); | return *reinterpret_cast<const Elf_Ehdr *>(base()); | ||||||||
} | } | ||||||||
▲ Show 20 Lines • Show All 450 Lines • ▼ Show 20 Lines | if (Index == ELF::SHN_XINDEX) { | ||||||||
// header at index 0. | // header at index 0. | ||||||||
if (Sections.empty()) | if (Sections.empty()) | ||||||||
return createError( | return createError( | ||||||||
"e_shstrndx == SHN_XINDEX, but the section header table is empty"); | "e_shstrndx == SHN_XINDEX, but the section header table is empty"); | ||||||||
Index = Sections[0].sh_link; | Index = Sections[0].sh_link; | ||||||||
} | } | ||||||||
if (!Index) // no section string table. | // There is no section name string table. Return FakeSectionStrings which | ||||||||
Not Done ReplyInline Actions// There is no section name string table. Return FakeSectionStrings which is non-empty if we have created fake sections. MaskRay: `// There is no section name string table. Return FakeSectionStrings which is non-empty if we… | |||||||||
Not Done ReplyInline Actionsjhenderson: @namhyung, I believe @MaskRay was suggesting using the comment he's posted instead of "no… | |||||||||
Oh, I overlooked that, sorry. Will update. namhyung: Oh, I overlooked that, sorry. Will update. | |||||||||
return ""; | // is non-empty if we have created fake sections. | ||||||||
Not Done ReplyInline ActionsJust return FakeSectionStrings; and delete return "" MaskRay: Just return `FakeSectionStrings;` and delete `return ""` | |||||||||
if (!Index) | |||||||||
return FakeSectionStrings; | |||||||||
if (Index >= Sections.size()) | if (Index >= Sections.size()) | ||||||||
return createError("section header string table index " + Twine(Index) + | return createError("section header string table index " + Twine(Index) + | ||||||||
" does not exist"); | " does not exist"); | ||||||||
return getStringTable(Sections[Index], WarnHandler); | return getStringTable(Sections[Index], WarnHandler); | ||||||||
} | } | ||||||||
/// This function finds the number of dynamic symbols using a GNU hash table. | /// This function finds the number of dynamic symbols using a GNU hash table. | ||||||||
/// | /// | ||||||||
▲ Show 20 Lines • Show All 104 Lines • ▼ Show 20 Lines | |||||||||
/// analyzed by linux perf or ET_EXEC with llvm-strip --strip-sections). | /// analyzed by linux perf or ET_EXEC with llvm-strip --strip-sections). | ||||||||
template <class ELFT> void ELFFile<ELFT>::createFakeSections() { | template <class ELFT> void ELFFile<ELFT>::createFakeSections() { | ||||||||
if (!FakeSections.empty()) | if (!FakeSections.empty()) | ||||||||
return; | return; | ||||||||
auto PhdrsOrErr = program_headers(); | auto PhdrsOrErr = program_headers(); | ||||||||
if (!PhdrsOrErr) | if (!PhdrsOrErr) | ||||||||
return; | return; | ||||||||
size_t Idx = 0; | |||||||||
Not Done ReplyInline ActionsThis probably wants to be a size_t since it's an index. jhenderson: This probably wants to be a size_t since it's an index. | |||||||||
Ok. namhyung: Ok. | |||||||||
std::vector<SmallString<10>> SecNames; | |||||||||
Not Done ReplyInline ActionsNot sure you need the llvm:: prefixes? jhenderson: Not sure you need the `llvm::` prefixes? | |||||||||
It seems not. Will remove. namhyung: It seems not. Will remove. | |||||||||
StringTableBuilder StrTab(StringTableBuilder::ELF); | |||||||||
for (auto Phdr : *PhdrsOrErr) { | for (auto Phdr : *PhdrsOrErr) { | ||||||||
MaskRayUnsubmitted Not Done ReplyInline Actionsfor (auto [Idx, Phdr] : llvm::enumerate(*PhdrsOrErr)) MaskRay: `for (auto [Idx, Phdr] : llvm::enumerate(*PhdrsOrErr))` | |||||||||
if (!(Phdr.p_type & ELF::PT_LOAD) || !(Phdr.p_flags & ELF::PF_X)) | if (Phdr.p_type != ELF::PT_LOAD || !(Phdr.p_flags & ELF::PF_X)) { | ||||||||
++Idx; | |||||||||
I've found a bug in this code - it should check equality. namhyung: I've found a bug in this code - it should check equality. | |||||||||
Not Done ReplyInline ActionsThe convention does not place parens around != MaskRay: The convention does not place parens around `!=` | |||||||||
Yeah, but I thought it'd good to be symmetric to the RHS. namhyung: Yeah, but I thought it'd good to be symmetric to the RHS. | |||||||||
Not Done ReplyInline ActionsHaving the "symmetry" has the potential to actually cause confusion, since the LHS isn't negated unlike the RHS. jhenderson: Having the "symmetry" has the potential to actually cause confusion, since the LHS isn't… | |||||||||
Not Done ReplyInline ActionsDo you actually need this? jhenderson: Do you actually need this? | |||||||||
Yep, Idx is a part of the section name. namhyung: Yep, Idx is a part of the section name. | |||||||||
Not Done ReplyInline ActionsI wasn't referring to MaskRay's suggestion. I was referring to the line that's actually highlighted with this comment (i.e. line 778 in this diff). jhenderson: I wasn't referring to MaskRay's suggestion. I was referring to the line that's actually… | |||||||||
Oh, I see. Maybe it's not needed but I think it's a good practice to maintain offset 0 as an empty string. Do you want me to remove it? namhyung: Oh, I see. Maybe it's not needed but I think it's a good practice to maintain offset 0 as an… | |||||||||
Not Done ReplyInline ActionsHaving come back to this a day later, I think it's wise to keep it, as we don't want to risk there being code that assumes sh_name == '\0' means no name. jhenderson: Having come back to this a day later, I think it's wise to keep it, as we don't want to risk… | |||||||||
Not Done ReplyInline ActionsFakeSectionStrings += '\0' or use push_back. MaskRay: `FakeSectionStrings += '\0'` or use push_back. | |||||||||
continue; | continue; | ||||||||
} | |||||||||
Elf_Shdr FakeShdr = {}; | Elf_Shdr FakeShdr = {}; | ||||||||
FakeShdr.sh_type = ELF::SHT_PROGBITS; | FakeShdr.sh_type = ELF::SHT_PROGBITS; | ||||||||
FakeShdr.sh_flags = ELF::SHF_ALLOC | ELF::SHF_EXECINSTR; | FakeShdr.sh_flags = ELF::SHF_ALLOC | ELF::SHF_EXECINSTR; | ||||||||
FakeShdr.sh_addr = Phdr.p_vaddr; | FakeShdr.sh_addr = Phdr.p_vaddr; | ||||||||
FakeShdr.sh_size = Phdr.p_memsz; | FakeShdr.sh_size = Phdr.p_memsz; | ||||||||
FakeShdr.sh_offset = Phdr.p_offset; | FakeShdr.sh_offset = Phdr.p_offset; | ||||||||
// Create a section name based on the p_type and index. | |||||||||
Not Done ReplyInline Actions
jhenderson: | |||||||||
SecNames.push_back(StringRef("PT_LOAD#" + std::to_string(Idx))); | |||||||||
jhendersonUnsubmitted Not Done ReplyInline ActionsI'm pretty sure this isn't going to work as intended in many cases: you create a StringRef to a std::string temporary, and stick that in SecNames. It's likely that the StringRef will be pointing at invalid memory immediately after its construction (it's likely just working because nothing has scribbled over the data in memory yet). Could you not just populate SecNames with the result of the "PT_LOAD#" + std::to_string(Idx) expression directly? If the StringTableBuilder contains StringRefs to the strings, using a std::vector will potentially invalidate them, if it needs to increase its capacity, right? We probably want a different strategy here. Probably simplest is simply to give the vector an initial capacity equal to the total number of program headers. Given that there are rarely that many, this shouldn't have any significant impact on performance. jhenderson: I'm pretty sure this isn't going to work as intended in many cases: you create a StringRef to a… | |||||||||
MaskRayUnsubmitted Not Done ReplyInline ActionsThanks for catching this. It is a code smell but it actually works: "PT_LOAD#" + std::to_string(Idx) returns a std::string. It works as the the lifetime of the temporary object std::string applies to the full-expression. The following should be better: SmallVector<std::string, 0> SecNames; ... SecNames.push_back(("PT_LOAD#" + Twine(Idx)).str()); // An alternative is ("PT_LOAD#" + Twine(Idx)).toVector(Name); but a local `SmallString` variable is needed MaskRay: Thanks for catching this. It is a code smell but it actually works: `"PT_LOAD#" + std… | |||||||||
namhyungAuthorUnsubmitted
I don't think so. The StringRef was needed because the constructor of SmallString takes it only. I wanted to pass std::string directly but it's not supported IIUC. After it creates a SmallString, I don't use the StringRef for a temp string anymore. Instead it uses the SmallString data in the vector, that's why I passed SecNames.back(). I believe it's safe to refer the data even after vector is resized. namhyung: > I'm pretty sure this isn't going to work as intended in many cases: you create a StringRef to… | |||||||||
FakeShdr.sh_name = StrTab.add(SecNames.back()); | |||||||||
FakeSections.push_back(FakeShdr); | FakeSections.push_back(FakeShdr); | ||||||||
++Idx; | |||||||||
} | } | ||||||||
StrTab.finalizeInOrder(); | |||||||||
raw_svector_ostream Strs(FakeSectionStrings); | |||||||||
StrTab.write(Strs); | |||||||||
} | } | ||||||||
template <class ELFT> | template <class ELFT> | ||||||||
Expected<typename ELFT::ShdrRange> ELFFile<ELFT>::sections() const { | Expected<typename ELFT::ShdrRange> ELFFile<ELFT>::sections() const { | ||||||||
const uintX_t SectionTableOffset = getHeader().e_shoff; | const uintX_t SectionTableOffset = getHeader().e_shoff; | ||||||||
if (SectionTableOffset == 0) { | if (SectionTableOffset == 0) { | ||||||||
if (!FakeSections.empty()) | if (!FakeSections.empty()) | ||||||||
return makeArrayRef(FakeSections.data(), FakeSections.size()); | return makeArrayRef(FakeSections.data(), FakeSections.size()); | ||||||||
▲ Show 20 Lines • Show All 438 Lines • Show Last 20 Lines |
Use SmallString<0> FakeSectionStrings.
Prefer SmallString because std::string has a small string optimization which isn't useful for this case.