Skip to content

Commit 044776b

Browse files
committedJun 26, 2019
Add GSYM utility files along with unit tests.
The full GSYM patch started with: https://reviews.llvm.org/D53379 In that patch we wanted to split up getting GSYM into the LLVM code base so we are not committing too much code at once. This is a first in a series of patches where I only add the foundation classes along with complete unit tests. They provide the foundation for encoding and decoding a GSYM file. File entries are defined in llvm::gsym::FileEntry. This class splits the file up into a directory and filename represented by uniqued string table offsets. This allows all files that are referred to in a GSYM file to be encoded as 1 based indexes into a global file table in the GSYM file. Function information in stored in llvm::gsym::FunctionInfo. This object represents a contiguous address range that has a name and range with an optional line table and inline call stack information. Line table entries are defined in llvm::gsym::LineEntry. They store only address, file and line information to keep the line tables simple and allows the information to be efficiently encoded in a subsequent patch. Inline information is defined in llvm::gsym::InlineInfo. These structs store the name of the inline function, along with one or more address ranges, and the file and line that called this function. They also contain any child inline information. There are also utility classes for address ranges in llvm::gsym::AddressRange, and string table support in llvm::gsym::StringTable which are simple classes. The unit tests test all the APIs on these simple classes so they will be ready for the next patches where we will create GSYM files and parse GSYM files. Differential Revision: https://reviews.llvm.org/D63104 llvm-svn: 364427
1 parent 5f798f1 commit 044776b

File tree

14 files changed

+1047
-0
lines changed

14 files changed

+1047
-0
lines changed
 
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
//===- FileEntry.h ----------------------------------------------*- C++ -*-===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
10+
#ifndef LLVM_DEBUGINFO_GSYM_FILEENTRY_H
11+
#define LLVM_DEBUGINFO_GSYM_FILEENTRY_H
12+
13+
#include "llvm/ADT/DenseMapInfo.h"
14+
#include "llvm/ADT/Hashing.h"
15+
#include <functional>
16+
#include <stdint.h>
17+
#include <utility>
18+
19+
namespace llvm {
20+
namespace gsym {
21+
22+
/// Files in GSYM are contained in FileEntry structs where we split the
23+
/// directory and basename into two different strings in the string
24+
/// table. This allows paths to shared commont directory and filename
25+
/// strings and saves space.
26+
struct FileEntry {
27+
28+
/// Offsets in the string table.
29+
/// @{
30+
uint32_t Dir = 0;
31+
uint32_t Base = 0;
32+
/// @}
33+
34+
FileEntry() = default;
35+
FileEntry(uint32_t D, uint32_t B) : Dir(D), Base(B) {}
36+
37+
// Implement operator== so that FileEntry can be used as key in
38+
// unordered containers.
39+
bool operator==(const FileEntry &RHS) const {
40+
return Base == RHS.Base && Dir == RHS.Dir;
41+
};
42+
bool operator!=(const FileEntry &RHS) const {
43+
return Base != RHS.Base || Dir != RHS.Dir;
44+
};
45+
};
46+
47+
} // namespace gsym
48+
49+
template <> struct DenseMapInfo<gsym::FileEntry> {
50+
static inline gsym::FileEntry getEmptyKey() {
51+
const auto key = DenseMapInfo<uint32_t>::getEmptyKey();
52+
return gsym::FileEntry(key, key);
53+
54+
}
55+
static inline gsym::FileEntry getTombstoneKey() {
56+
const auto key = DenseMapInfo<uint32_t>::getTombstoneKey();
57+
return gsym::FileEntry(key, key);
58+
}
59+
static unsigned getHashValue(const gsym::FileEntry &Val) {
60+
return llvm::hash_combine(DenseMapInfo<uint32_t>::getHashValue(Val.Dir),
61+
DenseMapInfo<uint32_t>::getHashValue(Val.Base));
62+
}
63+
static bool isEqual(const gsym::FileEntry &LHS, const gsym::FileEntry &RHS) {
64+
return LHS == RHS;
65+
}
66+
};
67+
68+
} // namespace llvm
69+
#endif // #ifndef LLVM_DEBUGINFO_GSYM_FILEENTRY_H
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
//===- FunctionInfo.h -------------------------------------------*- C++ -*-===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
10+
#ifndef LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
11+
#define LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
12+
13+
#include "llvm/DebugInfo/GSYM/InlineInfo.h"
14+
#include "llvm/DebugInfo/GSYM/LineEntry.h"
15+
#include "llvm/DebugInfo/GSYM/Range.h"
16+
#include "llvm/DebugInfo/GSYM/StringTable.h"
17+
#include <tuple>
18+
#include <vector>
19+
20+
namespace llvm {
21+
class raw_ostream;
22+
namespace gsym {
23+
24+
/// Function information in GSYM files encodes information for one
25+
/// contiguous address range. The name of the function is encoded as
26+
/// a string table offset and allows multiple functions with the same
27+
/// name to share the name string in the string table. Line tables are
28+
/// stored in a sorted vector of gsym::LineEntry objects and are split
29+
/// into line tables for each function. If a function has a discontiguous
30+
/// range, it will be split into two gsym::FunctionInfo objects. If the
31+
/// function has inline functions, the information will be encoded in
32+
/// the "Inline" member, see gsym::InlineInfo for more information.
33+
struct FunctionInfo {
34+
AddressRange Range;
35+
uint32_t Name; ///< String table offset in the string table.
36+
std::vector<gsym::LineEntry> Lines;
37+
InlineInfo Inline;
38+
39+
FunctionInfo(uint64_t Addr = 0, uint64_t Size = 0, uint32_t N = 0)
40+
: Range(Addr, Addr + Size), Name(N) {}
41+
42+
bool hasRichInfo() const {
43+
/// Returns whether we have something else than range and name. When
44+
/// converting information from a symbol table and from debug info, we
45+
/// might end up with multiple FunctionInfo objects for the same range
46+
/// and we need to be able to tell which one is the better object to use.
47+
return !Lines.empty() || Inline.isValid();
48+
}
49+
50+
bool isValid() const {
51+
/// Address and size can be zero and there can be no line entries for a
52+
/// symbol so the only indication this entry is valid is if the name is
53+
/// not zero. This can happen when extracting information from symbol
54+
/// tables that do not encode symbol sizes. In that case only the
55+
/// address and name will be filled in.
56+
return Name != 0;
57+
}
58+
59+
uint64_t startAddress() const { return Range.startAddress(); }
60+
uint64_t endAddress() const { return Range.endAddress(); }
61+
uint64_t size() const { return Range.size(); }
62+
void setStartAddress(uint64_t Addr) { Range.setStartAddress(Addr); }
63+
void setEndAddress(uint64_t Addr) { Range.setEndAddress(Addr); }
64+
void setSize(uint64_t Size) { Range.setSize(Size); }
65+
66+
void clear() {
67+
Range.clear();
68+
Name = 0;
69+
Lines.clear();
70+
Inline.clear();
71+
}
72+
};
73+
74+
inline bool operator==(const FunctionInfo &LHS, const FunctionInfo &RHS) {
75+
return LHS.Range == RHS.Range && LHS.Name == RHS.Name &&
76+
LHS.Lines == RHS.Lines && LHS.Inline == RHS.Inline;
77+
}
78+
inline bool operator!=(const FunctionInfo &LHS, const FunctionInfo &RHS) {
79+
return !(LHS == RHS);
80+
}
81+
/// This sorting will order things consistently by address range first, but then
82+
/// followed by inlining being valid and line tables. We might end up with a
83+
/// FunctionInfo from debug info that will have the same range as one from the
84+
/// symbol table, but we want to quickly be able to sort and use the best version
85+
/// when creating the final GSYM file.
86+
inline bool operator<(const FunctionInfo &LHS, const FunctionInfo &RHS) {
87+
// First sort by address range
88+
if (LHS.Range != RHS.Range)
89+
return LHS.Range < RHS.Range;
90+
91+
// Then sort by inline
92+
if (LHS.Inline.isValid() != RHS.Inline.isValid())
93+
return RHS.Inline.isValid();
94+
95+
// If the number of lines is the same, then compare line table entries
96+
if (LHS.Lines.size() == RHS.Lines.size())
97+
return LHS.Lines < RHS.Lines;
98+
// Then sort by number of line table entries (more is better)
99+
return LHS.Lines.size() < RHS.Lines.size();
100+
}
101+
102+
raw_ostream &operator<<(raw_ostream &OS, const FunctionInfo &R);
103+
104+
} // namespace gsym
105+
} // namespace llvm
106+
107+
#endif // #ifndef LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
//===- InlineInfo.h ---------------------------------------------*- C++ -*-===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
10+
#ifndef LLVM_DEBUGINFO_GSYM_INLINEINFO_H
11+
#define LLVM_DEBUGINFO_GSYM_INLINEINFO_H
12+
13+
#include "llvm/ADT/Optional.h"
14+
#include "llvm/DebugInfo/GSYM/Range.h"
15+
#include <stdint.h>
16+
#include <vector>
17+
18+
19+
namespace llvm {
20+
class raw_ostream;
21+
22+
namespace gsym {
23+
24+
/// Inline information stores the name of the inline function along with
25+
/// an array of address ranges. It also stores the call file and call line
26+
/// that called this inline function. This allows us to unwind inline call
27+
/// stacks back to the inline or concrete function that called this
28+
/// function. Inlined functions contained in this function are stored in the
29+
/// "Children" variable. All address ranges must be sorted and all address
30+
/// ranges of all children must be contained in the ranges of this function.
31+
/// Any clients that encode information will need to ensure the ranges are
32+
/// all contined correctly or lookups could fail. Add ranges in these objects
33+
/// must be contained in the top level FunctionInfo address ranges as well.
34+
struct InlineInfo {
35+
36+
uint32_t Name; ///< String table offset in the string table.
37+
uint32_t CallFile; ///< 1 based file index in the file table.
38+
uint32_t CallLine; ///< Source line number.
39+
AddressRanges Ranges;
40+
std::vector<InlineInfo> Children;
41+
InlineInfo() : Name(0), CallFile(0), CallLine(0) {}
42+
void clear() {
43+
Name = 0;
44+
CallFile = 0;
45+
CallLine = 0;
46+
Ranges.clear();
47+
Children.clear();
48+
}
49+
bool isValid() const { return !Ranges.empty(); }
50+
/// Lookup an address in the InlineInfo object
51+
///
52+
/// This function is used to symbolicate an inline call stack and can
53+
/// turn one address in the program into one or more inline call stacks
54+
/// and have the stack trace show the original call site from
55+
/// non-inlined code.
56+
///
57+
/// \param Addr the address to lookup
58+
/// \param InlineStack a vector of InlineInfo objects that describe the
59+
/// inline call stack for a given address.
60+
///
61+
/// \returns true if successful, false otherwise
62+
typedef std::vector<const InlineInfo *> InlineArray;
63+
llvm::Optional<InlineArray> getInlineStack(uint64_t Addr) const;
64+
};
65+
66+
inline bool operator==(const InlineInfo &LHS, const InlineInfo &RHS) {
67+
return LHS.Name == RHS.Name && LHS.CallFile == RHS.CallFile &&
68+
LHS.CallLine == RHS.CallLine && LHS.Ranges == RHS.Ranges &&
69+
LHS.Children == RHS.Children;
70+
}
71+
72+
raw_ostream &operator<<(raw_ostream &OS, const InlineInfo &FI);
73+
74+
} // namespace gsym
75+
} // namespace llvm
76+
77+
#endif // #ifndef LLVM_DEBUGINFO_GSYM_INLINEINFO_H
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
//===- LineEntry.h ----------------------------------------------*- C++ -*-===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
10+
#ifndef LLVM_DEBUGINFO_GSYM_LINEENTRY_H
11+
#define LLVM_DEBUGINFO_GSYM_LINEENTRY_H
12+
13+
#include "llvm/DebugInfo/GSYM/Range.h"
14+
15+
namespace llvm {
16+
namespace gsym {
17+
18+
/// Line entries are used to encode the line tables in FunctionInfo objects.
19+
/// They are stored as a sorted vector of these objects and store the
20+
/// address, file and line of the line table row for a given address. The
21+
/// size of a line table entry is calculated by looking at the next entry
22+
/// in the FunctionInfo's vector of entries.
23+
struct LineEntry {
24+
uint64_t Addr; ///< Start address of this line entry.
25+
uint32_t File; ///< 1 based index of file in FileTable
26+
uint32_t Line; ///< Source line number.
27+
LineEntry(uint64_t A = 0, uint32_t F = 0, uint32_t L = 0)
28+
: Addr(A), File(F), Line(L) {}
29+
bool isValid() { return File != 0; }
30+
};
31+
32+
inline raw_ostream &operator<<(raw_ostream &OS, const LineEntry &LE) {
33+
return OS << "addr=" << HEX64(LE.Addr) << ", file=" << format("%3u", LE.File)
34+
<< ", line=" << format("%3u", LE.Line);
35+
}
36+
37+
inline bool operator==(const LineEntry &LHS, const LineEntry &RHS) {
38+
return LHS.Addr == RHS.Addr && LHS.File == RHS.File && LHS.Line == RHS.Line;
39+
}
40+
inline bool operator!=(const LineEntry &LHS, const LineEntry &RHS) {
41+
return !(LHS == RHS);
42+
}
43+
inline bool operator<(const LineEntry &LHS, const LineEntry &RHS) {
44+
return LHS.Addr < RHS.Addr;
45+
}
46+
} // namespace gsym
47+
} // namespace llvm
48+
#endif // #ifndef LLVM_DEBUGINFO_GSYM_LINEENTRY_H

0 commit comments

Comments
 (0)