Skip to content

Commit 51d9fa0

Browse files
committedApr 5, 2019
Minidump: Add support for reading/writing strings
Summary: Strings in minidump files are stored as a 32-bit length field, giving the length of the string in *bytes*, which is followed by the appropriate number of UTF16 code units. The string is also supposed to be null-terminated, and the null-terminator is not a part of the length field. This patch: - adds support for reading these strings out of the minidump file (this implementation does not depend on proper null-termination) - adds support for writing them to a minidump file - using the previous two pieces implements proper (de)serialization of the CSDVersion field of the SystemInfo stream. Previously, this was only read/written as hex, and no attempt was made to access the referenced string -- now this string is read and written correctly. The changes are tested via yaml2obj|obj2yaml round-trip as well as a unit test which checks the corner cases of the string deserialization logic. Reviewers: jhenderson, zturner, clayborg Subscribers: llvm-commits, aprantl, markmentovai, amccarth, lldb-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59775 llvm-svn: 357749
1 parent 98edcd9 commit 51d9fa0

File tree

7 files changed

+114
-11
lines changed

7 files changed

+114
-11
lines changed
 

Diff for: ‎llvm/include/llvm/Object/Minidump.h

+4
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ class MinidumpFile : public Binary {
4343
/// file does not contain a stream of this type.
4444
Optional<ArrayRef<uint8_t>> getRawStream(minidump::StreamType Type) const;
4545

46+
/// Returns the minidump string at the given offset. An error is returned if
47+
/// we fail to parse the string, or the string is invalid UTF16.
48+
Expected<std::string> getString(size_t Offset) const;
49+
4650
/// Returns the contents of the SystemInfo stream, cast to the appropriate
4751
/// type. An error is returned if the file does not contain this stream, or
4852
/// the stream is smaller than the size of the SystemInfo structure. The

Diff for: ‎llvm/include/llvm/ObjectYAML/MinidumpYAML.h

+4-2
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,12 @@ struct RawContentStream : public Stream {
6767
/// SystemInfo minidump stream.
6868
struct SystemInfoStream : public Stream {
6969
minidump::SystemInfo Info;
70+
std::string CSDVersion;
7071

71-
explicit SystemInfoStream(const minidump::SystemInfo &Info)
72+
explicit SystemInfoStream(const minidump::SystemInfo &Info,
73+
std::string CSDVersion)
7274
: Stream(StreamKind::SystemInfo, minidump::StreamType::SystemInfo),
73-
Info(Info) {}
75+
Info(Info), CSDVersion(std::move(CSDVersion)) {}
7476

7577
SystemInfoStream()
7678
: Stream(StreamKind::SystemInfo, minidump::StreamType::SystemInfo) {

Diff for: ‎llvm/lib/Object/Minidump.cpp

+28
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#include "llvm/Object/Minidump.h"
1010
#include "llvm/Object/Error.h"
11+
#include "llvm/Support/ConvertUTF.h"
1112

1213
using namespace llvm;
1314
using namespace llvm::object;
@@ -21,6 +22,33 @@ MinidumpFile::getRawStream(minidump::StreamType Type) const {
2122
return None;
2223
}
2324

25+
Expected<std::string> MinidumpFile::getString(size_t Offset) const {
26+
// Minidump strings consist of a 32-bit length field, which gives the size of
27+
// the string in *bytes*. This is followed by the actual string encoded in
28+
// UTF16.
29+
auto ExpectedSize =
30+
getDataSliceAs<support::ulittle32_t>(getData(), Offset, 1);
31+
if (!ExpectedSize)
32+
return ExpectedSize.takeError();
33+
size_t Size = (*ExpectedSize)[0];
34+
if (Size % 2 != 0)
35+
return createError("String size not even");
36+
Size /= 2;
37+
if (Size == 0)
38+
return "";
39+
40+
Offset += sizeof(support::ulittle32_t);
41+
auto ExpectedData = getDataSliceAs<UTF16>(getData(), Offset, Size);
42+
if (!ExpectedData)
43+
return ExpectedData.takeError();
44+
45+
std::string Result;
46+
if (!convertUTF16ToUTF8String(*ExpectedData, Result))
47+
return createError("String decoding failed");
48+
49+
return Result;
50+
}
51+
2452
Expected<ArrayRef<uint8_t>>
2553
MinidumpFile::getDataSlice(ArrayRef<uint8_t> Data, size_t Offset, size_t Size) {
2654
// Check for overflow.

Diff for: ‎llvm/lib/ObjectYAML/MinidumpYAML.cpp

+41-5
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
//===----------------------------------------------------------------------===//
88

99
#include "llvm/ObjectYAML/MinidumpYAML.h"
10+
#include "llvm/Support/ConvertUTF.h"
1011

1112
using namespace llvm;
1213
using namespace llvm::MinidumpYAML;
@@ -39,6 +40,8 @@ class BlobAllocator {
3940
return allocateArray(makeArrayRef(Data));
4041
}
4142

43+
size_t allocateString(StringRef Str);
44+
4245
void writeTo(raw_ostream &OS) const;
4346

4447
private:
@@ -48,6 +51,26 @@ class BlobAllocator {
4851
};
4952
} // namespace
5053

54+
size_t BlobAllocator::allocateString(StringRef Str) {
55+
SmallVector<UTF16, 32> WStr;
56+
bool OK = convertUTF8ToUTF16String(Str, WStr);
57+
assert(OK && "Invalid UTF8 in Str?");
58+
(void)OK;
59+
60+
SmallVector<support::ulittle16_t, 32> EndianStr(WStr.size() + 1,
61+
support::ulittle16_t());
62+
copy(WStr, EndianStr.begin());
63+
return allocateCallback(
64+
sizeof(uint32_t) + EndianStr.size() * sizeof(support::ulittle16_t),
65+
[EndianStr](raw_ostream &OS) {
66+
// Length does not include the null-terminator.
67+
support::ulittle32_t Length(2 * (EndianStr.size() - 1));
68+
OS.write(reinterpret_cast<const char *>(&Length), sizeof(Length));
69+
OS.write(reinterpret_cast<const char *>(EndianStr.begin()),
70+
sizeof(support::ulittle16_t) * EndianStr.size());
71+
});
72+
}
73+
5174
void BlobAllocator::writeTo(raw_ostream &OS) const {
5275
size_t BeginOffset = OS.tell();
5376
for (const auto &Callback : Callbacks)
@@ -269,7 +292,7 @@ static void streamMapping(yaml::IO &IO, SystemInfoStream &Stream) {
269292
mapOptional(IO, "Minor Version", Info.MinorVersion, 0);
270293
mapOptional(IO, "Build Number", Info.BuildNumber, 0);
271294
IO.mapRequired("Platform ID", Info.PlatformId);
272-
mapOptionalHex(IO, "CSD Version RVA", Info.CSDVersionRVA, 0);
295+
IO.mapOptional("CSD Version", Stream.CSDVersion, "");
273296
mapOptionalHex(IO, "Suite Mask", Info.SuiteMask, 0);
274297
mapOptionalHex(IO, "Reserved", Info.Reserved, 0);
275298
switch (static_cast<ProcessorArchitecture>(Info.ProcessorArch)) {
@@ -337,6 +360,7 @@ static Directory layout(BlobAllocator &File, Stream &S) {
337360
Directory Result;
338361
Result.Type = S.Type;
339362
Result.Location.RVA = File.tell();
363+
Optional<size_t> DataEnd;
340364
switch (S.Kind) {
341365
case Stream::StreamKind::RawContent: {
342366
RawContentStream &Raw = cast<RawContentStream>(S);
@@ -347,14 +371,22 @@ static Directory layout(BlobAllocator &File, Stream &S) {
347371
});
348372
break;
349373
}
350-
case Stream::StreamKind::SystemInfo:
351-
File.allocateObject(cast<SystemInfoStream>(S).Info);
374+
case Stream::StreamKind::SystemInfo: {
375+
SystemInfoStream &SystemInfo = cast<SystemInfoStream>(S);
376+
File.allocateObject(SystemInfo.Info);
377+
// The CSD string is not a part of the stream.
378+
DataEnd = File.tell();
379+
SystemInfo.Info.CSDVersionRVA = File.allocateString(SystemInfo.CSDVersion);
352380
break;
381+
}
353382
case Stream::StreamKind::TextContent:
354383
File.allocateArray(arrayRefFromStringRef(cast<TextContentStream>(S).Text));
355384
break;
356385
}
357-
Result.Location.DataSize = File.tell() - Result.Location.RVA;
386+
// If DataEnd is not set, we assume everything we generated is a part of the
387+
// stream.
388+
Result.Location.DataSize =
389+
DataEnd.getValueOr(File.tell()) - Result.Location.RVA;
358390
return Result;
359391
}
360392

@@ -395,7 +427,11 @@ Stream::create(const Directory &StreamDesc, const object::MinidumpFile &File) {
395427
auto ExpectedInfo = File.getSystemInfo();
396428
if (!ExpectedInfo)
397429
return ExpectedInfo.takeError();
398-
return make_unique<SystemInfoStream>(*ExpectedInfo);
430+
auto ExpectedCSDVersion = File.getString(ExpectedInfo->CSDVersionRVA);
431+
if (!ExpectedCSDVersion)
432+
return ExpectedInfo.takeError();
433+
return make_unique<SystemInfoStream>(*ExpectedInfo,
434+
std::move(*ExpectedCSDVersion));
399435
}
400436
case StreamKind::TextContent:
401437
return make_unique<TextContentStream>(

Diff for: ‎llvm/test/tools/obj2yaml/basic-minidump.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ Streams:
55
- Type: SystemInfo
66
Processor Arch: ARM64
77
Platform ID: Linux
8-
CSD Version RVA: 0x01020304
8+
CSD Version: Linux 3.13.0-91-generic
99
CPU:
1010
CPUID: 0x05060708
1111
- Type: LinuxAuxv
@@ -22,7 +22,7 @@ Streams:
2222
# CHECK-NEXT: - Type: SystemInfo
2323
# CHECK-NEXT: Processor Arch: ARM64
2424
# CHECK-NEXT: Platform ID: Linux
25-
# CHECK-NEXT: CSD Version RVA: 0x01020304
25+
# CHECK-NEXT: CSD Version: Linux 3.13.0-91-generic
2626
# CHECK-NEXT: CPU:
2727
# CHECK-NEXT: CPUID: 0x05060708
2828
# CHECK-NEXT: - Type: LinuxAuxv

Diff for: ‎llvm/unittests/Object/MinidumpTest.cpp

+35
Original file line numberDiff line numberDiff line change
@@ -249,3 +249,38 @@ TEST(MinidumpFile, getSystemInfo) {
249249
EXPECT_EQ(0x08070605u, Info.CPU.X86.FeatureInfo);
250250
EXPECT_EQ(0x02010009u, Info.CPU.X86.AMDExtendedFeatures);
251251
}
252+
253+
TEST(MinidumpFile, getString) {
254+
std::vector<uint8_t> ManyStrings{
255+
// Header
256+
'M', 'D', 'M', 'P', 0x93, 0xa7, 0, 0, // Signature, Version
257+
2, 0, 0, 0, // NumberOfStreams,
258+
0x20, 0, 0, 0, // StreamDirectoryRVA
259+
0, 1, 2, 3, 4, 5, 6, 7, // Checksum, TimeDateStamp
260+
8, 9, 0, 1, 2, 3, 4, 5, // Flags
261+
// Stream Directory
262+
0, 0, 0, 0, 0, 0, 0, 0, // Type, DataSize,
263+
0x20, 0, 0, 0, // RVA
264+
1, 0, 0, 0, 0, 0, // String1 - odd length
265+
0, 0, 1, 0, 0, 0, // String2 - too long
266+
2, 0, 0, 0, 0, 0xd8, // String3 - invalid utf16
267+
0, 0, 0, 0, 0, 0, // String4 - ""
268+
2, 0, 0, 0, 'a', 0, // String5 - "a"
269+
0, // Mis-align next string
270+
2, 0, 0, 0, 'a', 0, // String6 - "a"
271+
272+
};
273+
auto ExpectedFile = create(ManyStrings);
274+
ASSERT_THAT_EXPECTED(ExpectedFile, Succeeded());
275+
const MinidumpFile &File = **ExpectedFile;
276+
EXPECT_THAT_EXPECTED(File.getString(44), Failed<BinaryError>());
277+
EXPECT_THAT_EXPECTED(File.getString(50), Failed<BinaryError>());
278+
EXPECT_THAT_EXPECTED(File.getString(56), Failed<BinaryError>());
279+
EXPECT_THAT_EXPECTED(File.getString(62), HasValue(""));
280+
EXPECT_THAT_EXPECTED(File.getString(68), HasValue("a"));
281+
EXPECT_THAT_EXPECTED(File.getString(75), HasValue("a"));
282+
283+
// Check the case when the size field does not fit into the remaining data.
284+
EXPECT_THAT_EXPECTED(File.getString(ManyStrings.size() - 2),
285+
Failed<BinaryError>());
286+
}

Diff for: ‎llvm/unittests/ObjectYAML/MinidumpYAMLTest.cpp

-2
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ TEST(MinidumpYAML, Basic) {
3333
- Type: SystemInfo
3434
Processor Arch: ARM64
3535
Platform ID: Linux
36-
CSD Version RVA: 0x01020304
3736
CPU:
3837
CPUID: 0x05060708
3938
- Type: LinuxMaps
@@ -54,7 +53,6 @@ TEST(MinidumpYAML, Basic) {
5453
const SystemInfo &SysInfo = *ExpectedSysInfo;
5554
EXPECT_EQ(ProcessorArchitecture::ARM64, SysInfo.ProcessorArch);
5655
EXPECT_EQ(OSPlatform::Linux, SysInfo.PlatformId);
57-
EXPECT_EQ(0x01020304u, SysInfo.CSDVersionRVA);
5856
EXPECT_EQ(0x05060708u, SysInfo.CPU.Arm.CPUID);
5957

6058
EXPECT_EQ(StreamType::LinuxMaps, File.streams()[1].Type);

0 commit comments

Comments
 (0)
Please sign in to comment.