Please use GitHub pull requests for new patches. Avoid migrating existing patches. Phabricator shutdown timeline
Changeset View
Standalone View
source/Plugins/ObjectFile/Breakpad/BreakpadRecords.cpp
Show First 20 Lines • Show All 50 Lines • ▼ Show 20 Lines | return llvm::StringSwitch<Triple::ArchType>(str) | ||||
.Case("s390", Triple::systemz) | .Case("s390", Triple::systemz) | ||||
.Case("sparc", Triple::sparc) | .Case("sparc", Triple::sparc) | ||||
.Case("sparcv9", Triple::sparcv9) | .Case("sparcv9", Triple::sparcv9) | ||||
.Case("x86", Triple::x86) | .Case("x86", Triple::x86) | ||||
.Case("x86_64", Triple::x86_64) | .Case("x86_64", Triple::x86_64) | ||||
.Default(Triple::UnknownArch); | .Default(Triple::UnknownArch); | ||||
} | } | ||||
static llvm::StringRef consume_front(llvm::StringRef &str, size_t n) { | /// Return the number of hex digits needed to encode an (POD) object of a given | ||||
llvm::StringRef result = str.take_front(n); | /// type. | ||||
str = str.drop_front(n); | template <typename T> static constexpr size_t hex_digits() { | ||||
return result; | return 2 * sizeof(T); | ||||
} | |||||
/// Consume the right number of digits from the input StringRef and convert it | |||||
/// to the endian-specific integer N. Return true on success. | |||||
template <typename T> static bool consume_integer(llvm::StringRef &str, T &N) { | |||||
clayborg: rename to "consume_hex_integer" or an extra parameter for the base instead of hard coding to 16? | |||||
labathAuthorUnsubmitted I'll rename the function before submitting. labath: I'll rename the function before submitting. | |||||
llvm::StringRef chunk = str.take_front(hex_digits<T>()); | |||||
uintmax_t t; | |||||
lemoUnsubmitted Not Done ReplyInline Actions0; ?lemo: = 0; ? | |||||
labathAuthorUnsubmitted That is not necessary, as to_integer initializes it. Perhaps more importantly, not initializing this allows tools like msan and valgrind to actually detect the cases when you end up using an uninitialized value. labath: That is not necessary, as to_integer initializes it. Perhaps more importantly, not initializing… | |||||
if (!to_integer(chunk, t, 16)) | |||||
return false; | |||||
N = t; | |||||
str = str.drop_front(hex_digits<T>()); | |||||
return true; | |||||
} | } | ||||
static UUID parseModuleId(llvm::Triple::OSType os, llvm::StringRef str) { | static UUID parseModuleId(llvm::Triple::OSType os, llvm::StringRef str) { | ||||
struct uuid_data { | struct data_t { | ||||
llvm::support::ulittle32_t uuid1; | struct uuid_t { | ||||
llvm::support::ulittle16_t uuid2[2]; | llvm::support::ulittle32_t part1; | ||||
uint8_t uuid3[8]; | llvm::support::ulittle16_t part2[2]; | ||||
uint8_t part3[8]; | |||||
} uuid; | |||||
llvm::support::ulittle32_t age; | llvm::support::ulittle32_t age; | ||||
} data; | } data; | ||||
static_assert(sizeof(data) == 20, ""); | static_assert(sizeof(data) == 20, ""); | ||||
// The textual module id encoding should be between 33 and 40 bytes long, | // The textual module id encoding should be between 33 and 40 bytes long, | ||||
// depending on the size of the age field, which is of variable length. | // depending on the size of the age field, which is of variable length. | ||||
// The first three chunks of the id are encoded in big endian, so we need to | // The first three chunks of the id are encoded in big endian, so we need to | ||||
// byte-swap those. | // byte-swap those. | ||||
if (str.size() < 33 || str.size() > 40) | if (str.size() <= hex_digits<data_t::uuid_t>() || | ||||
str.size() > hex_digits<data_t>()) | |||||
return UUID(); | return UUID(); | ||||
uint32_t t; | if (!consume_integer(str, data.uuid.part1)) | ||||
if (to_integer(consume_front(str, 8), t, 16)) | |||||
data.uuid1 = t; | |||||
else | |||||
return UUID(); | return UUID(); | ||||
for (int i = 0; i < 2; ++i) { | for (auto &t : data.uuid.part2) { | ||||
if (to_integer(consume_front(str, 4), t, 16)) | if (!consume_integer(str, t)) | ||||
clayborgUnsubmitted Not Done ReplyInline ActionsThis is OK as long as the UUIDs for ELF don't fall into this category. I am able to match up UUIDs for ELF just fine for breakpad files for Android. clayborg: This is OK as long as the UUIDs for ELF don't fall into this category. I am able to match up… | |||||
labathAuthorUnsubmitted Normally on linux you should always have the INFO record, which will have the unmangled UUID, and which we will give preference to if it is available. If for some reason we don't find an INFO record, then we will use the version from the MODULE record, which we will manually unmangle. But that should be the right thing to do as it matches what the breakpad generator does. (You can see this by looking at a file which has both of these records -- they will differ in that the first one will be mangled and will have an extra zero at the end). labath: Normally on linux you should always have the INFO record, which will have the unmangled UUID… | |||||
data.uuid2[i] = t; | |||||
else | |||||
return UUID(); | return UUID(); | ||||
} | } | ||||
for (int i = 0; i < 8; ++i) { | for (auto &t : data.uuid.part3) { | ||||
if (!to_integer(consume_front(str, 2), data.uuid3[i], 16)) | if (!consume_integer(str, t)) | ||||
return UUID(); | return UUID(); | ||||
} | } | ||||
if (to_integer(str, t, 16)) | uint32_t age; | ||||
data.age = t; | if (!to_integer(str, age, 16)) | ||||
else | |||||
return UUID(); | return UUID(); | ||||
data.age = age; | |||||
// On non-windows, the age field should always be zero, so we don't include to | // On non-windows, the age field should always be zero, so we don't include to | ||||
// match the native uuid format of these platforms. | // match the native uuid format of these platforms. | ||||
return UUID::fromData(&data, os == llvm::Triple::Win32 ? 20 : 16); | return UUID::fromData(&data, os == llvm::Triple::Win32 ? sizeof(data) | ||||
: sizeof(data.uuid)); | |||||
clayborgUnsubmitted Not Done ReplyInline ActionsFor Apple platforms Breakpad actually incorrectly byte swaps the first 32 bits and the next two 16 bit values. I have a patch for this, but since this is moving, it would be great to get that fix in here. Also, many linux breakpad file have the UUID field present but set to all zeroes which is bad as UUID parsing code will cause multiple modules to claim a UUID of all zeroes is valid and causes all modules with such a UUID to just use the first one it finds. The code I have in my unsubmitted patch is: if (pdb70_uuid->Age == 0) { bool all_zeroes = true; for (size_t i=0; all_zeroes && i<sizeof(pdb70_uuid->Uuid); ++i) all_zeroes = pdb70_uuid->Uuid[i] == 0; // Many times UUIDs are not filled in at all, so avoid claiming that // all such libraries have a valid UUID that is all zeroes. if (all_zeroes) return UUID(); if (arch.GetTriple().getVendor() == llvm::Triple::Apple) { // Breakpad incorrectly byte swaps the first 32 bit and next 2 16 bit // values in the UUID field. Undo this so we can match things up // with our symbol files uint8_t apple_uuid[16]; // Byte swap the first 32 bits apple_uuid[0] = pdb70_uuid->Uuid[3]; apple_uuid[1] = pdb70_uuid->Uuid[2]; apple_uuid[2] = pdb70_uuid->Uuid[1]; apple_uuid[3] = pdb70_uuid->Uuid[0]; // Byte swap the next 16 bit value apple_uuid[4] = pdb70_uuid->Uuid[5]; apple_uuid[5] = pdb70_uuid->Uuid[4]; // Byte swap the next 16 bit value apple_uuid[6] = pdb70_uuid->Uuid[7]; apple_uuid[7] = pdb70_uuid->Uuid[6]; for (size_t i=8; i<sizeof(pdb70_uuid->Uuid); ++i) apple_uuid[i] = pdb70_uuid->Uuid[i]; return UUID::fromData(apple_uuid, sizeof(apple_uuid)); } else return UUID::fromData(pdb70_uuid->Uuid, sizeof(pdb70_uuid->Uuid)); } else return UUID::fromData(pdb70_uuid, sizeof(*pdb70_uuid)); clayborg: For Apple platforms Breakpad actually incorrectly byte swaps the first 32 bits and the next two… | |||||
labathAuthorUnsubmitted The byte swapping is already handled in this code (that's why it's this complicated). My impression was that the swapping is not apple-specific, but rather depends where you read the UUID from (MODULE record has it swapped, INFO record doesn't). Though that may depend on how we chose to interpret the raw bytes in other UUID sources (object files, pdb files, ...). As for the all-zero case, that should be easily fixable, by changing fromData to fromOptionalData, but I'm surprised that this is necessary, as I was under the impression that breakpad invent's its own UUIDs in case the original object file doesn't have them. (This would actually be the better case, as otherwise we'd have to figure out how to tell the fictional and non-fictional uuids apart.) @lemo: Can you shed any light on this? labath: The byte swapping is already handled in this code (that's why it's this complicated). My… | |||||
} | } | ||||
Record::Kind Record::classify(llvm::StringRef Line) { | Record::Kind Record::classify(llvm::StringRef Line) { | ||||
Token Tok = toToken(getToken(Line).first); | Token Tok = toToken(getToken(Line).first); | ||||
switch (Tok) { | switch (Tok) { | ||||
case Token::Module: | case Token::Module: | ||||
return Record::Module; | return Record::Module; | ||||
case Token::Info: | case Token::Info: | ||||
Show All 38 Lines | llvm::Optional<ModuleRecord> ModuleRecord::parse(llvm::StringRef Line) { | ||||
std::tie(Str, Line) = getToken(Line); | std::tie(Str, Line) = getToken(Line); | ||||
UUID ID = parseModuleId(OS, Str); | UUID ID = parseModuleId(OS, Str); | ||||
if (!ID) | if (!ID) | ||||
return llvm::None; | return llvm::None; | ||||
return ModuleRecord(OS, Arch, std::move(ID)); | return ModuleRecord(OS, Arch, std::move(ID)); | ||||
} | } | ||||
bool breakpad::operator==(const ModuleRecord &L, const ModuleRecord &R) { | |||||
return L.getOS() == R.getOS() && L.getArch() == R.getArch() && | |||||
L.getID() == R.getID(); | |||||
} | |||||
llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, | llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, | ||||
const ModuleRecord &R) { | const ModuleRecord &R) { | ||||
return OS << "MODULE " << llvm::Triple::getOSTypeName(R.getOS()) << " " | return OS << "MODULE " << llvm::Triple::getOSTypeName(R.OS) << " " | ||||
<< llvm::Triple::getArchTypeName(R.getArch()) << " " | << llvm::Triple::getArchTypeName(R.Arch) << " " | ||||
<< R.getID().GetAsString(); | << R.ID.GetAsString(); | ||||
} | } | ||||
llvm::Optional<InfoRecord> InfoRecord::parse(llvm::StringRef Line) { | llvm::Optional<InfoRecord> InfoRecord::parse(llvm::StringRef Line) { | ||||
// INFO CODE_ID 554889E55DC3CCCCCCCCCCCCCCCCCCCC [a.exe] | // INFO CODE_ID 554889E55DC3CCCCCCCCCCCCCCCCCCCC [a.exe] | ||||
llvm::StringRef Str; | llvm::StringRef Str; | ||||
std::tie(Str, Line) = getToken(Line); | std::tie(Str, Line) = getToken(Line); | ||||
if (toToken(Str) != Token::Info) | if (toToken(Str) != Token::Info) | ||||
return llvm::None; | return llvm::None; | ||||
Show All 10 Lines | if (Line.trim().empty()) { | ||||
if (Str.empty() || ID.SetFromStringRef(Str, Str.size() / 2) != Str.size()) | if (Str.empty() || ID.SetFromStringRef(Str, Str.size() / 2) != Str.size()) | ||||
return llvm::None; | return llvm::None; | ||||
} | } | ||||
return InfoRecord(std::move(ID)); | return InfoRecord(std::move(ID)); | ||||
} | } | ||||
llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, | llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, | ||||
const InfoRecord &R) { | const InfoRecord &R) { | ||||
return OS << "INFO CODE_ID " << R.getID().GetAsString(); | return OS << "INFO CODE_ID " << R.ID.GetAsString(); | ||||
} | } | ||||
static bool parsePublicOrFunc(llvm::StringRef Line, bool &Multiple, | static bool parsePublicOrFunc(llvm::StringRef Line, bool &Multiple, | ||||
lldb::addr_t &Address, lldb::addr_t *Size, | lldb::addr_t &Address, lldb::addr_t *Size, | ||||
lldb::addr_t &ParamSize, llvm::StringRef &Name) { | lldb::addr_t &ParamSize, llvm::StringRef &Name) { | ||||
// PUBLIC [m] address param_size name | // PUBLIC [m] address param_size name | ||||
// or | // or | ||||
// FUNC [m] address size param_size name | // FUNC [m] address size param_size name | ||||
Show All 37 Lines | llvm::Optional<FuncRecord> FuncRecord::parse(llvm::StringRef Line) { | ||||
if (parsePublicOrFunc(Line, Multiple, Address, &Size, ParamSize, Name)) | if (parsePublicOrFunc(Line, Multiple, Address, &Size, ParamSize, Name)) | ||||
return FuncRecord(Multiple, Address, Size, ParamSize, Name); | return FuncRecord(Multiple, Address, Size, ParamSize, Name); | ||||
return llvm::None; | return llvm::None; | ||||
} | } | ||||
bool breakpad::operator==(const FuncRecord &L, const FuncRecord &R) { | bool breakpad::operator==(const FuncRecord &L, const FuncRecord &R) { | ||||
return L.getMultiple() == R.getMultiple() && | return L.Multiple == R.Multiple && L.Address == R.Address && | ||||
L.getAddress() == R.getAddress() && L.getSize() == R.getSize() && | L.Size == R.Size && L.ParamSize == R.ParamSize && L.Name == R.Name; | ||||
L.getParamSize() == R.getParamSize() && L.getName() == R.getName(); | |||||
} | } | ||||
llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, | llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, | ||||
const FuncRecord &R) { | const FuncRecord &R) { | ||||
return OS << llvm::formatv("FUNC {0}{1:x-} {2:x-} {3:x-} {4}", | return OS << llvm::formatv("FUNC {0}{1:x-} {2:x-} {3:x-} {4}", | ||||
R.getMultiple() ? "m " : "", R.getAddress(), | R.Multiple ? "m " : "", R.Address, R.Size, | ||||
R.getSize(), R.getParamSize(), R.getName()); | R.ParamSize, R.Name); | ||||
} | } | ||||
llvm::Optional<PublicRecord> PublicRecord::parse(llvm::StringRef Line) { | llvm::Optional<PublicRecord> PublicRecord::parse(llvm::StringRef Line) { | ||||
bool Multiple; | bool Multiple; | ||||
lldb::addr_t Address, ParamSize; | lldb::addr_t Address, ParamSize; | ||||
llvm::StringRef Name; | llvm::StringRef Name; | ||||
if (parsePublicOrFunc(Line, Multiple, Address, nullptr, ParamSize, Name)) | if (parsePublicOrFunc(Line, Multiple, Address, nullptr, ParamSize, Name)) | ||||
return PublicRecord(Multiple, Address, ParamSize, Name); | return PublicRecord(Multiple, Address, ParamSize, Name); | ||||
return llvm::None; | return llvm::None; | ||||
} | } | ||||
bool breakpad::operator==(const PublicRecord &L, const PublicRecord &R) { | bool breakpad::operator==(const PublicRecord &L, const PublicRecord &R) { | ||||
return L.getMultiple() == R.getMultiple() && | return L.Multiple == R.Multiple && L.Address == R.Address && | ||||
L.getAddress() == R.getAddress() && | L.ParamSize == R.ParamSize && L.Name == R.Name; | ||||
L.getParamSize() == R.getParamSize() && L.getName() == R.getName(); | |||||
} | } | ||||
llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, | llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, | ||||
const PublicRecord &R) { | const PublicRecord &R) { | ||||
return OS << llvm::formatv("PUBLIC {0}{1:x-} {2:x-} {3}", | return OS << llvm::formatv("PUBLIC {0}{1:x-} {2:x-} {3}", | ||||
R.getMultiple() ? "m " : "", R.getAddress(), | R.Multiple ? "m " : "", R.Address, R.ParamSize, | ||||
R.getParamSize(), R.getName()); | R.Name); | ||||
} | } | ||||
llvm::StringRef breakpad::toString(Record::Kind K) { | llvm::StringRef breakpad::toString(Record::Kind K) { | ||||
switch (K) { | switch (K) { | ||||
case Record::Module: | case Record::Module: | ||||
return "MODULE"; | return "MODULE"; | ||||
case Record::Info: | case Record::Info: | ||||
return "INFO"; | return "INFO"; | ||||
Show All 13 Lines |
rename to "consume_hex_integer" or an extra parameter for the base instead of hard coding to 16?