Changeset View
Standalone View
lld/COFF/PDB.cpp
Show All 11 Lines | |||||
#include "Config.h" | #include "Config.h" | ||||
#include "Error.h" | #include "Error.h" | ||||
#include "SymbolTable.h" | #include "SymbolTable.h" | ||||
#include "Symbols.h" | #include "Symbols.h" | ||||
#include "Writer.h" | #include "Writer.h" | ||||
#include "llvm/DebugInfo/CodeView/CVDebugRecord.h" | #include "llvm/DebugInfo/CodeView/CVDebugRecord.h" | ||||
#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" | #include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" | ||||
#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" | #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" | ||||
#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" | |||||
#include "llvm/DebugInfo/CodeView/SymbolSerializer.h" | #include "llvm/DebugInfo/CodeView/SymbolSerializer.h" | ||||
#include "llvm/DebugInfo/CodeView/TypeDeserializer.h" | #include "llvm/DebugInfo/CodeView/TypeDeserializer.h" | ||||
#include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h" | #include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h" | ||||
#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" | #include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" | ||||
#include "llvm/DebugInfo/CodeView/TypeStreamMerger.h" | #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h" | ||||
#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h" | #include "llvm/DebugInfo/CodeView/TypeTableBuilder.h" | ||||
#include "llvm/DebugInfo/MSF/MSFBuilder.h" | #include "llvm/DebugInfo/MSF/MSFBuilder.h" | ||||
#include "llvm/DebugInfo/MSF/MSFCommon.h" | #include "llvm/DebugInfo/MSF/MSFCommon.h" | ||||
▲ Show 20 Lines • Show All 265 Lines • ▼ Show 20 Lines | if (TI.toArrayIndex() >= TypeIndexMap.size()) | ||||
return false; | return false; | ||||
TI = TypeIndexMap[TI.toArrayIndex()]; | TI = TypeIndexMap[TI.toArrayIndex()]; | ||||
return true; | return true; | ||||
} | } | ||||
static void remapTypesInSymbolRecord(ObjFile *File, | static void remapTypesInSymbolRecord(ObjFile *File, | ||||
MutableArrayRef<uint8_t> Contents, | MutableArrayRef<uint8_t> Contents, | ||||
const CVIndexMap &IndexMap, | const CVIndexMap &IndexMap, | ||||
const TypeTableBuilder &IDTable, | |||||
ArrayRef<TiReference> TypeRefs) { | ArrayRef<TiReference> TypeRefs) { | ||||
for (const TiReference &Ref : TypeRefs) { | for (const TiReference &Ref : TypeRefs) { | ||||
unsigned ByteSize = Ref.Count * sizeof(TypeIndex); | unsigned ByteSize = Ref.Count * sizeof(TypeIndex); | ||||
if (Contents.size() < Ref.Offset + ByteSize) | if (Contents.size() < Ref.Offset + ByteSize) | ||||
fatal("symbol record too short"); | fatal("symbol record too short"); | ||||
// This can be an item index or a type index. Choose the appropriate map. | // This can be an item index or a type index. Choose the appropriate map. | ||||
ArrayRef<TypeIndex> TypeOrItemMap = IndexMap.TPIMap; | ArrayRef<TypeIndex> TypeOrItemMap = IndexMap.TPIMap; | ||||
if (Ref.Kind == TiRefKind::IndexRef && IndexMap.IsTypeServerMap) | if (Ref.Kind == TiRefKind::IndexRef && IndexMap.IsTypeServerMap) | ||||
TypeOrItemMap = IndexMap.IPIMap; | TypeOrItemMap = IndexMap.IPIMap; | ||||
MutableArrayRef<TypeIndex> TIs( | MutableArrayRef<TypeIndex> TIs( | ||||
reinterpret_cast<TypeIndex *>(Contents.data() + Ref.Offset), Ref.Count); | reinterpret_cast<TypeIndex *>(Contents.data() + Ref.Offset), Ref.Count); | ||||
for (TypeIndex &TI : TIs) { | for (TypeIndex &TI : TIs) { | ||||
if (!remapTypeIndex(TI, TypeOrItemMap)) { | if (!remapTypeIndex(TI, TypeOrItemMap)) { | ||||
TI = TypeIndex(SimpleTypeKind::NotTranslated); | TI = TypeIndex(SimpleTypeKind::NotTranslated); | ||||
log("ignoring symbol record in " + File->getName() + | log("ignoring symbol record in " + File->getName() + | ||||
" with bad type index 0x" + utohexstr(TI.getIndex())); | " with bad type index 0x" + utohexstr(TI.getIndex())); | ||||
continue; | continue; | ||||
} | } | ||||
} | } | ||||
} | } | ||||
} | } | ||||
/// MSVC translates S_PROC_ID_END to S_END. | static SymbolKind symbolKind(ArrayRef<uint8_t> RecordData) { | ||||
uint16_t canonicalizeSymbolKind(SymbolKind Kind) { | const RecordPrefix *Prefix = | ||||
if (Kind == SymbolKind::S_PROC_ID_END) | reinterpret_cast<const RecordPrefix *>(RecordData.data()); | ||||
return static_cast<SymbolKind>(uint16_t(Prefix->RecordKind)); | |||||
} | |||||
/// MSVC translates S_PROC_ID_END to S_END, and S_[LG]PROC32_ID to S_[LG]PROC32 | |||||
static SymbolKind canonicalizeSymbolKind(MutableArrayRef<uint8_t> &RecordData, | |||||
rnk: This thing isn't really canonicalizing the symbol kind anymore, it's rewriting the symbol… | |||||
Not Done ReplyInline ActionsI still like rewriteSymbolForPDB rnk: I still like `rewriteSymbolForPDB` | |||||
const TypeTableBuilder &IDTable) { | |||||
RecordPrefix *Prefix = reinterpret_cast<RecordPrefix *>(RecordData.data()); | |||||
SymbolKind Kind = symbolKind(RecordData); | |||||
if (Kind == SymbolKind::S_PROC_ID_END) { | |||||
Prefix->RecordKind = SymbolKind::S_END; | |||||
return SymbolKind::S_END; | return SymbolKind::S_END; | ||||
} | |||||
// In an object file, GPROC32_ID has an embedded reference which refers to the | |||||
// single object file type index namespace. We translated this to the PDB | |||||
// file's index namespace, and since it's an ID record that namespace is | |||||
// inside of the ID (IPI) stream. But we need to convert this to a TPI record | |||||
// so we remap again from ID namespace to type namespace. | |||||
if (Kind == SymbolKind::S_GPROC32_ID || Kind == SymbolKind::S_LPROC32_ID) { | |||||
SmallVector<TiReference, 1> Refs; | |||||
auto Content = RecordData.drop_front(sizeof(RecordPrefix)); | |||||
CVSymbol Sym(Kind, RecordData); | |||||
discoverTypeIndices(Sym, Refs); | |||||
assert(Refs.size() == 1); | |||||
TypeIndex *TI = | |||||
reinterpret_cast<TypeIndex *>(Content.data() + Refs[0].Offset); | |||||
// `TI` is the index of a FuncIdRecord which lives in the IPI stream, whose | |||||
// `FunctionType` member refers to the TPI stream. | |||||
if (!TI->isSimple() && !TI->isNoneType()) { | |||||
ArrayRef<uint8_t> FuncIdData = IDTable.records()[TI->toArrayIndex()]; | |||||
FuncIdRecord FID; | |||||
CVType FuncId(TypeLeafKind::LF_FUNC_ID, FuncIdData); | |||||
cantFail(TypeDeserializer::deserializeAs<FuncIdRecord>(FuncId, FID)); | |||||
rnkUnsubmitted Not Done ReplyInline ActionsThese can be LF_MFUNC_ID records, we need to handle that. rnk: These can be `LF_MFUNC_ID` records, we need to handle that. | |||||
*TI = FID.FunctionType; | |||||
} | |||||
Kind = (Kind == SymbolKind::S_GPROC32_ID) ? SymbolKind::S_GPROC32 | |||||
: SymbolKind::S_LPROC32; | |||||
} | |||||
Prefix->RecordKind = uint16_t(Kind); | |||||
return Kind; | return Kind; | ||||
} | } | ||||
/// Copy the symbol record. In a PDB, symbol records must be 4 byte aligned. | /// Copy the symbol record. In a PDB, symbol records must be 4 byte aligned. | ||||
/// The object file may not be aligned. | /// The object file may not be aligned. | ||||
static MutableArrayRef<uint8_t> copySymbolForPdb(const CVSymbol &Sym, | static MutableArrayRef<uint8_t> copySymbolForPdb(const CVSymbol &Sym, | ||||
BumpPtrAllocator &Alloc) { | BumpPtrAllocator &Alloc) { | ||||
size_t Size = alignTo(Sym.length(), alignOf(CodeViewContainer::Pdb)); | size_t Size = alignTo(Sym.length(), alignOf(CodeViewContainer::Pdb)); | ||||
assert(Size >= 4 && "record too short"); | assert(Size >= 4 && "record too short"); | ||||
assert(Size <= MaxRecordLength && "record too long"); | assert(Size <= MaxRecordLength && "record too long"); | ||||
void *Mem = Alloc.Allocate(Size, 4); | void *Mem = Alloc.Allocate(Size, 4); | ||||
// Copy the symbol record and zero out any padding bytes. | // Copy the symbol record and zero out any padding bytes. | ||||
MutableArrayRef<uint8_t> NewData(reinterpret_cast<uint8_t *>(Mem), Size); | MutableArrayRef<uint8_t> NewData(reinterpret_cast<uint8_t *>(Mem), Size); | ||||
memcpy(NewData.data(), Sym.data().data(), Sym.length()); | memcpy(NewData.data(), Sym.data().data(), Sym.length()); | ||||
memset(NewData.data() + Sym.length(), 0, Size - Sym.length()); | memset(NewData.data() + Sym.length(), 0, Size - Sym.length()); | ||||
// Update the record prefix length. It should point to the beginning of the | // Update the record prefix length. It should point to the beginning of the | ||||
// next record. MSVC does some canonicalization of the record kind, so we do | // next record. | ||||
// that as well. | |||||
auto *Prefix = reinterpret_cast<RecordPrefix *>(Mem); | auto *Prefix = reinterpret_cast<RecordPrefix *>(Mem); | ||||
Prefix->RecordKind = canonicalizeSymbolKind(Sym.kind()); | |||||
Prefix->RecordLen = Size - 2; | Prefix->RecordLen = Size - 2; | ||||
Not Done ReplyInline ActionsWe need an error check for OOB IPI index rnk: We need an error check for OOB IPI index | |||||
Not Done ReplyInline ActionsThat should have already happened when we merged the type streams. Wouldn't we have discarded any records that have invalid type index references, rather than emitting them with bad indices? zturner: That should have already happened when we merged the type streams. Wouldn't we have discarded… | |||||
Not Done ReplyInline ActionsYeah, that makes sense. Any invalid item index would've become the simple "not translated" type index. rnk: Yeah, that makes sense. Any invalid item index would've become the simple "not translated" type… | |||||
return NewData; | return NewData; | ||||
} | } | ||||
/// Return true if this symbol opens a scope. This implies that the symbol has | /// Return true if this symbol opens a scope. This implies that the symbol has | ||||
Not Done ReplyInline ActionsIn theory, we could do the same discoverTypeIndices trick that we do above to find the type index reference. Alternatively, if we know it's an LF_FUNC_ID, we should load the appropriate offset. Are these ever LF_MFUNC_IDs, though? rnk: In theory, we could do the same `discoverTypeIndices` trick that we do above to find the type… | |||||
Not Done ReplyInline ActionsWe could just load the appropriate offset, but that would require hardcoding the offset, which is duplication of magic numbers that I'd like to avoid. I chose to do it this way because it makes the code more readable and is ultimately just copying some fields. zturner: We could just load the appropriate offset, but that would require hardcoding the offset, which… | |||||
/// "parent" and "end" fields, which contain the offset of the S_END or | /// "parent" and "end" fields, which contain the offset of the S_END or | ||||
/// S_INLINESITE_END record. | /// S_INLINESITE_END record. | ||||
static bool symbolOpensScope(SymbolKind Kind) { | static bool symbolOpensScope(SymbolKind Kind) { | ||||
switch (Kind) { | switch (Kind) { | ||||
case SymbolKind::S_GPROC32: | case SymbolKind::S_GPROC32: | ||||
case SymbolKind::S_LPROC32: | case SymbolKind::S_LPROC32: | ||||
case SymbolKind::S_LPROC32_ID: | case SymbolKind::S_LPROC32_ID: | ||||
case SymbolKind::S_GPROC32_ID: | case SymbolKind::S_GPROC32_ID: | ||||
case SymbolKind::S_BLOCK32: | case SymbolKind::S_BLOCK32: | ||||
case SymbolKind::S_SEPCODE: | case SymbolKind::S_SEPCODE: | ||||
case SymbolKind::S_THUNK32: | case SymbolKind::S_THUNK32: | ||||
case SymbolKind::S_INLINESITE: | case SymbolKind::S_INLINESITE: | ||||
case SymbolKind::S_INLINESITE2: | case SymbolKind::S_INLINESITE2: | ||||
return true; | return true; | ||||
Not Done ReplyInline ActionsThis would fail if it were an LF_MFUNC_ID rnk: This would fail if it were an `LF_MFUNC_ID` | |||||
default: | default: | ||||
break; | break; | ||||
} | } | ||||
return false; | return false; | ||||
} | } | ||||
static bool symbolEndsScope(SymbolKind Kind) { | static bool symbolEndsScope(SymbolKind Kind) { | ||||
switch (Kind) { | switch (Kind) { | ||||
Show All 35 Lines | if (Stack.empty()) { | ||||
return; | return; | ||||
} | } | ||||
SymbolScope S = Stack.pop_back_val(); | SymbolScope S = Stack.pop_back_val(); | ||||
S.OpeningRecord->PtrEnd = CurOffset; | S.OpeningRecord->PtrEnd = CurOffset; | ||||
} | } | ||||
static void mergeSymbolRecords(BumpPtrAllocator &Alloc, ObjFile *File, | static void mergeSymbolRecords(BumpPtrAllocator &Alloc, ObjFile *File, | ||||
const CVIndexMap &IndexMap, | const CVIndexMap &IndexMap, | ||||
const TypeTableBuilder &IDTable, | |||||
BinaryStreamRef SymData) { | BinaryStreamRef SymData) { | ||||
// FIXME: Improve error recovery by warning and skipping records when | // FIXME: Improve error recovery by warning and skipping records when | ||||
// possible. | // possible. | ||||
CVSymbolArray Syms; | CVSymbolArray Syms; | ||||
BinaryStreamReader Reader(SymData); | BinaryStreamReader Reader(SymData); | ||||
ExitOnErr(Reader.readArray(Syms, Reader.getLength())); | ExitOnErr(Reader.readArray(Syms, Reader.getLength())); | ||||
SmallVector<SymbolScope, 4> Scopes; | SmallVector<SymbolScope, 4> Scopes; | ||||
for (const CVSymbol &Sym : Syms) { | for (CVSymbol Sym : Syms) { | ||||
// Discover type index references in the record. Skip it if we don't know | // Discover type index references in the record. Skip it if we don't know | ||||
// where they are. | // where they are. | ||||
SmallVector<TiReference, 32> TypeRefs; | SmallVector<TiReference, 32> TypeRefs; | ||||
if (!discoverTypeIndices(Sym, TypeRefs)) { | if (!discoverTypeIndices(Sym, TypeRefs)) { | ||||
log("ignoring unknown symbol record with kind 0x" + utohexstr(Sym.kind())); | log("ignoring unknown symbol record with kind 0x" + utohexstr(Sym.kind())); | ||||
continue; | continue; | ||||
} | } | ||||
// Copy the symbol record so we can mutate it. | // Copy the symbol record so we can mutate it. | ||||
MutableArrayRef<uint8_t> NewData = copySymbolForPdb(Sym, Alloc); | MutableArrayRef<uint8_t> NewData = copySymbolForPdb(Sym, Alloc); | ||||
// Re-map all the type index references. | // Re-map all the type index references. | ||||
MutableArrayRef<uint8_t> Contents = | MutableArrayRef<uint8_t> Contents = | ||||
NewData.drop_front(sizeof(RecordPrefix)); | NewData.drop_front(sizeof(RecordPrefix)); | ||||
remapTypesInSymbolRecord(File, Contents, IndexMap, TypeRefs); | remapTypesInSymbolRecord(File, Contents, IndexMap, IDTable, TypeRefs); | ||||
SymbolKind NewKind = canonicalizeSymbolKind(NewData, IDTable); | |||||
Not Done ReplyInline ActionsLet's keep this as part of the copySymbolForPdb. rnk: Let's keep this as part of the copySymbolForPdb. | |||||
Not Done ReplyInline ActionsI actually think it should be here. I tried moving it into copySymbolForPdb and the code becomes really ugly. You end up having to duplicate a lot of the logic from remapTypesInSymbolRecord but forcing it to remap into the TPI stream's index space. By doing it this way, each step is logically independent. copy symbol literally just copies some memory like the comment says. remap types does exactly what it says, and so does canonicalize. I can call it remapSymbolKind or something like that if you prefer, but I kind of like the idea of keeping it as a sequence of 3 independent steps like this. zturner: I actually think it should be here. I tried moving it into `copySymbolForPdb` and the code… | |||||
Not Done ReplyInline ActionsRight, I agree, it should happen after remapping. Let's call it something else, though. We're doing a lot more than canonicalizing the symbol kind. Ultimately, we're going to need logic here for filtering out S_UDT records. rnk: Right, I agree, it should happen after remapping. Let's call it something else, though. We're… | |||||
assert(NewKind == symbolKind(NewData)); | |||||
rnkUnsubmitted Not Done ReplyInline ActionsRather than having this assert, let's remove the return type from canonicalizeSymbolKind and do SymbolKind NewKind = symbolKind(NewData);. It's one load. rnk: Rather than having this assert, let's remove the return type from canonicalizeSymbolKind and do… | |||||
assert(NewKind != SymbolKind::S_GPROC32_ID); | |||||
assert(NewKind != SymbolKind::S_LPROC32_ID); | |||||
assert(NewKind != SymbolKind::S_PROC_ID_END); | |||||
rnkUnsubmitted Not Done ReplyInline ActionsThese != assertions are trivial, IMO. They also don't represent internal errors, we emitted those records for a while, and cvdump still accepted our PDBs. rnk: These != assertions are trivial, IMO. They also don't represent internal errors, we emitted… | |||||
// Fill in "Parent" and "End" fields by maintaining a stack of scopes. | // Fill in "Parent" and "End" fields by maintaining a stack of scopes. | ||||
CVSymbol NewSym(Sym.kind(), NewData); | CVSymbol NewSym(NewKind, NewData); | ||||
if (symbolOpensScope(Sym.kind())) | if (symbolOpensScope(NewKind)) | ||||
scopeStackOpen(Scopes, File->ModuleDBI->getNextSymbolOffset(), NewSym); | scopeStackOpen(Scopes, File->ModuleDBI->getNextSymbolOffset(), NewSym); | ||||
else if (symbolEndsScope(Sym.kind())) | else if (symbolEndsScope(NewKind)) | ||||
scopeStackClose(Scopes, File->ModuleDBI->getNextSymbolOffset(), File); | scopeStackClose(Scopes, File->ModuleDBI->getNextSymbolOffset(), File); | ||||
// Add the symbol to the module. | // Add the symbol to the module. | ||||
File->ModuleDBI->addSymbol(NewSym); | File->ModuleDBI->addSymbol(NewSym); | ||||
} | } | ||||
} | } | ||||
// Allocate memory for a .debug$S section and relocate it. | // Allocate memory for a .debug$S section and relocate it. | ||||
▲ Show 20 Lines • Show All 53 Lines • ▼ Show 20 Lines | for (const DebugSubsectionRecord &SS : Subsections) { | ||||
ExitOnErr(Checksums.initialize(SS.getRecordData())); | ExitOnErr(Checksums.initialize(SS.getRecordData())); | ||||
break; | break; | ||||
case DebugSubsectionKind::Lines: | case DebugSubsectionKind::Lines: | ||||
// We can add the relocated line table directly to the PDB without | // We can add the relocated line table directly to the PDB without | ||||
// modification because the file checksum offsets will stay the same. | // modification because the file checksum offsets will stay the same. | ||||
File->ModuleDBI->addDebugSubsection(SS); | File->ModuleDBI->addDebugSubsection(SS); | ||||
break; | break; | ||||
case DebugSubsectionKind::Symbols: | case DebugSubsectionKind::Symbols: | ||||
mergeSymbolRecords(Alloc, File, IndexMap, SS.getRecordData()); | mergeSymbolRecords(Alloc, File, IndexMap, IDTable, SS.getRecordData()); | ||||
break; | break; | ||||
default: | default: | ||||
// FIXME: Process the rest of the subsections. | // FIXME: Process the rest of the subsections. | ||||
break; | break; | ||||
} | } | ||||
} | } | ||||
if (Checksums.valid()) { | if (Checksums.valid()) { | ||||
▲ Show 20 Lines • Show All 214 Lines • Show Last 20 Lines |
This thing isn't really canonicalizing the symbol kind anymore, it's rewriting the symbol record. Maybe name it something like rewriteSymbolForPDB?