Index: lld/COFF/PDB.cpp =================================================================== --- lld/COFF/PDB.cpp +++ lld/COFF/PDB.cpp @@ -157,7 +157,7 @@ fatal(EC, "StreamReader.readArray failed"); TypeDatabase TDB(0); - CVSymbolDumper SymbolDumper(W, TDB, nullptr, false); + CVSymbolDumper SymbolDumper(W, TDB, 1, nullptr, false); if (auto EC = SymbolDumper.dump(Symbols)) fatal(EC, "CVSymbolDumper::dump failed"); } Index: llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h =================================================================== --- llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h +++ llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h @@ -136,6 +136,7 @@ Error mapByteVectorTail(ArrayRef &Bytes); Error mapByteVectorTail(std::vector &Bytes); + Error padToAlignment(uint32_t Align); Error skipPadding(); private: Index: llvm/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h =================================================================== --- llvm/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h +++ llvm/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h @@ -24,9 +24,9 @@ class SymbolVisitorDelegate; class SymbolDeserializer : public SymbolVisitorCallbacks { struct MappingInfo { - explicit MappingInfo(ArrayRef RecordData) + MappingInfo(ArrayRef RecordData, uint32_t Align) : Stream(RecordData, llvm::support::little), Reader(Stream), - Mapping(Reader) {} + Mapping(Reader, Align) {} BinaryByteStream Stream; BinaryStreamReader Reader; @@ -35,7 +35,9 @@ public: template static Error deserializeAs(CVSymbol Symbol, T &Record) { - SymbolDeserializer S(nullptr); + // If we're just deserializing one record, then don't worry about alignment + // as there's nothing that comes after. + SymbolDeserializer S(nullptr, 1); if (auto EC = S.visitSymbolBegin(Symbol)) return EC; if (auto EC = S.visitKnownRecord(Symbol, Record)) @@ -45,12 +47,12 @@ return Error::success(); } - explicit SymbolDeserializer(SymbolVisitorDelegate *Delegate) - : Delegate(Delegate) {} + explicit SymbolDeserializer(SymbolVisitorDelegate *Delegate, uint32_t Align) + : Delegate(Delegate), Align(Align) {} Error visitSymbolBegin(CVSymbol &Record) override { assert(!Mapping && "Already in a symbol mapping!"); - Mapping = llvm::make_unique(Record.content()); + Mapping = llvm::make_unique(Record.content(), Align); return Mapping->Mapping.visitSymbolBegin(Record); } Error visitSymbolEnd(CVSymbol &Record) override { @@ -77,6 +79,7 @@ return Error::success(); } + uint32_t Align; SymbolVisitorDelegate *Delegate; std::unique_ptr Mapping; }; Index: llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h =================================================================== --- llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h +++ llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h @@ -25,10 +25,10 @@ /// Dumper for CodeView symbol streams found in COFF object files and PDB files. class CVSymbolDumper { public: - CVSymbolDumper(ScopedPrinter &W, TypeCollection &Types, + CVSymbolDumper(ScopedPrinter &W, TypeCollection &Types, uint32_t Align, std::unique_ptr ObjDelegate, bool PrintRecordBytes) - : W(W), Types(Types), ObjDelegate(std::move(ObjDelegate)), + : W(W), Types(Types), Align(Align), ObjDelegate(std::move(ObjDelegate)), PrintRecordBytes(PrintRecordBytes) {} /// Dumps one type record. Returns false if there was a type parsing error, @@ -44,6 +44,7 @@ private: ScopedPrinter &W; TypeCollection &Types; + uint32_t Align; std::unique_ptr ObjDelegate; bool PrintRecordBytes; Index: llvm/include/llvm/DebugInfo/CodeView/SymbolRecordMapping.h =================================================================== --- llvm/include/llvm/DebugInfo/CodeView/SymbolRecordMapping.h +++ llvm/include/llvm/DebugInfo/CodeView/SymbolRecordMapping.h @@ -20,8 +20,10 @@ namespace codeview { class SymbolRecordMapping : public SymbolVisitorCallbacks { public: - explicit SymbolRecordMapping(BinaryStreamReader &Reader) : IO(Reader) {} - explicit SymbolRecordMapping(BinaryStreamWriter &Writer) : IO(Writer) {} + explicit SymbolRecordMapping(BinaryStreamReader &Reader, uint32_t Align) + : IO(Reader), Align(Align) {} + explicit SymbolRecordMapping(BinaryStreamWriter &Writer, uint32_t Align) + : IO(Writer), Align(Align) {} Error visitSymbolBegin(CVSymbol &Record) override; Error visitSymbolEnd(CVSymbol &Record) override; @@ -34,6 +36,7 @@ private: Optional Kind; + uint32_t Align = 0; CodeViewRecordIO IO; }; } Index: llvm/include/llvm/DebugInfo/CodeView/SymbolSerializer.h =================================================================== --- llvm/include/llvm/DebugInfo/CodeView/SymbolSerializer.h +++ llvm/include/llvm/DebugInfo/CodeView/SymbolSerializer.h @@ -46,17 +46,18 @@ public: template - static CVSymbol writeOneSymbol(SymType &Sym, BumpPtrAllocator &Storage) { + static CVSymbol writeOneSymbol(SymType &Sym, BumpPtrAllocator &Storage, + uint32_t Align) { CVSymbol Result; Result.Type = static_cast(Sym.Kind); - SymbolSerializer Serializer(Storage); + SymbolSerializer Serializer(Storage, Align); consumeError(Serializer.visitSymbolBegin(Result)); consumeError(Serializer.visitKnownRecord(Result, Sym)); consumeError(Serializer.visitSymbolEnd(Result)); return Result; } - explicit SymbolSerializer(BumpPtrAllocator &Storage); + SymbolSerializer(BumpPtrAllocator &Storage, uint32_t Align); virtual Error visitSymbolBegin(CVSymbol &Record) override; virtual Error visitSymbolEnd(CVSymbol &Record) override; Index: llvm/include/llvm/ObjectYAML/CodeViewYAMLSymbols.h =================================================================== --- llvm/include/llvm/ObjectYAML/CodeViewYAMLSymbols.h +++ llvm/include/llvm/ObjectYAML/CodeViewYAMLSymbols.h @@ -28,7 +28,8 @@ struct SymbolRecord { std::shared_ptr Symbol; - codeview::CVSymbol toCodeViewSymbol(BumpPtrAllocator &Allocator) const; + codeview::CVSymbol toCodeViewSymbol(BumpPtrAllocator &Allocator, + uint32_t Align) const; static Expected fromCodeViewSymbol(codeview::CVSymbol Symbol); }; Index: llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp =================================================================== --- llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp +++ llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp @@ -27,6 +27,31 @@ Error CodeViewRecordIO::endRecord() { assert(!Limits.empty() && "Not in a record!"); Limits.pop_back(); + if (isReading()) { + // We would like to assert that we actually read all the bytes that we + // expected to read for this record, but unfortunately we can't do this + // because some CodeView producers generate technically "invalid" records. + // This has been observed with MASM, although it may happen in other + // situations too. Some types and symbols have fields that are only + // supposed to be written if certain conditions are true. For example, + // an LF_POINTER type record will write two additional fields if its + // attribute field specifies that it is a pointer to member, so a pointer + // to member record will be 8 bytes longer than a regular pointer record. + // But MASM will write those two additional fields no matter what, while + // other producers won't. As a result, we can't reliably predict what + // length the record *should* be and we have to just trust that the length + // field in the header is correct. + // assert(!Limits.empty() || + // Reader->bytesRemaining() == 0 && "Did not read entire record!"); + } else { + // When we're writing, we probably allocated a buffer that is the maximum + // possible size since we didn't know how large the record would be in + // advance. So we can't rely on the writer being at the end and for that + // reason we still can't assert that we've written every byte we expected + // to write. + // assert(!Limits.empty() || + // Writer->bytesRemaining() == 0 && "Did not read entire record!"); + } return Error::success(); } @@ -49,6 +74,12 @@ return *Min; } +Error CodeViewRecordIO::padToAlignment(uint32_t Align) { + if (isReading()) + return Reader->padToAlignment(Align); + return Writer->padToAlignment(Align); +} + Error CodeViewRecordIO::skipPadding() { assert(!isWriting() && "Cannot skip padding while writing!"); Index: llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp =================================================================== --- llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp +++ llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp @@ -668,7 +668,7 @@ Error CVSymbolDumper::dump(CVRecord &Record) { SymbolVisitorCallbackPipeline Pipeline; - SymbolDeserializer Deserializer(ObjDelegate.get()); + SymbolDeserializer Deserializer(ObjDelegate.get(), Align); CVSymbolDumperImpl Dumper(Types, ObjDelegate.get(), W, PrintRecordBytes); Pipeline.addCallbackToPipeline(Deserializer); @@ -679,7 +679,7 @@ Error CVSymbolDumper::dump(const CVSymbolArray &Symbols) { SymbolVisitorCallbackPipeline Pipeline; - SymbolDeserializer Deserializer(ObjDelegate.get()); + SymbolDeserializer Deserializer(ObjDelegate.get(), Align); CVSymbolDumperImpl Dumper(Types, ObjDelegate.get(), W, PrintRecordBytes); Pipeline.addCallbackToPipeline(Deserializer); Index: llvm/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp =================================================================== --- llvm/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp +++ llvm/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp @@ -40,6 +40,7 @@ } Error SymbolRecordMapping::visitSymbolEnd(CVSymbol &Record) { + error(IO.padToAlignment(Align)); error(IO.endRecord()); return Error::success(); } Index: llvm/lib/DebugInfo/CodeView/SymbolSerializer.cpp =================================================================== --- llvm/lib/DebugInfo/CodeView/SymbolSerializer.cpp +++ llvm/lib/DebugInfo/CodeView/SymbolSerializer.cpp @@ -12,9 +12,10 @@ using namespace llvm; using namespace llvm::codeview; -SymbolSerializer::SymbolSerializer(BumpPtrAllocator &Allocator) - : Storage(Allocator), RecordBuffer(MaxRecordLength), Stream(RecordBuffer, llvm::support::little), - Writer(Stream), Mapping(Writer) { } +SymbolSerializer::SymbolSerializer(BumpPtrAllocator &Allocator, uint32_t Align) + : Storage(Allocator), RecordBuffer(MaxRecordLength), + Stream(RecordBuffer, llvm::support::little), Writer(Stream), + Mapping(Writer, Align) {} Error SymbolSerializer::visitSymbolBegin(CVSymbol &Record) { assert(!CurrentSymbol.hasValue() && "Already in a symbol mapping!"); Index: llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp =================================================================== --- llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp +++ llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp @@ -66,7 +66,10 @@ void DbiModuleDescriptorBuilder::addSymbol(CVSymbol Symbol) { Symbols.push_back(Symbol); - SymbolByteSize += Symbol.data().size(); + // Symbols written to a PDB file are required to be 4 byte aligned. The same + // is not true of object files. + assert(Symbol.length() % 4 == 0); + SymbolByteSize += Symbol.length(); } void DbiModuleDescriptorBuilder::addSourceFile(StringRef Path) { Index: llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp =================================================================== --- llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp +++ llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp @@ -147,8 +147,8 @@ virtual ~SymbolRecordBase() {} virtual void map(yaml::IO &io) = 0; - virtual codeview::CVSymbol - toCodeViewSymbol(BumpPtrAllocator &Allocator) const = 0; + virtual codeview::CVSymbol toCodeViewSymbol(BumpPtrAllocator &Allocator, + uint32_t Align) const = 0; virtual Error fromCodeViewSymbol(codeview::CVSymbol Type) = 0; }; @@ -158,9 +158,9 @@ void map(yaml::IO &io) override; - codeview::CVSymbol - toCodeViewSymbol(BumpPtrAllocator &Allocator) const override { - return SymbolSerializer::writeOneSymbol(Symbol, Allocator); + codeview::CVSymbol toCodeViewSymbol(BumpPtrAllocator &Allocator, + uint32_t Align) const override { + return SymbolSerializer::writeOneSymbol(Symbol, Allocator, Align); } Error fromCodeViewSymbol(codeview::CVSymbol CVS) override { return SymbolDeserializer::deserializeAs(CVS, Symbol); @@ -428,9 +428,10 @@ } } -CVSymbol CodeViewYAML::SymbolRecord::toCodeViewSymbol( - BumpPtrAllocator &Allocator) const { - return Symbol->toCodeViewSymbol(Allocator); +CVSymbol +CodeViewYAML::SymbolRecord::toCodeViewSymbol(BumpPtrAllocator &Allocator, + uint32_t Align) const { + return Symbol->toCodeViewSymbol(Allocator, Align); } namespace llvm { Index: llvm/test/DebugInfo/PDB/pdbdump-write.test =================================================================== --- llvm/test/DebugInfo/PDB/pdbdump-write.test +++ llvm/test/DebugInfo/PDB/pdbdump-write.test @@ -11,10 +11,10 @@ ; (for example if we don't write the entire stream) ; ; RUN: llvm-pdbdump pdb2yaml -stream-metadata -stream-directory \ -; RUN: -pdb-stream -tpi-stream %p/Inputs/empty.pdb > %t.1 +; RUN: -pdb-stream -tpi-stream -dbi-module-syms %p/Inputs/empty.pdb > %t.1 ; RUN: llvm-pdbdump yaml2pdb -pdb=%t.2 %t.1 ; RUN: llvm-pdbdump pdb2yaml -pdb-stream -tpi-stream \ -; RUN: -no-file-headers %p/Inputs/empty.pdb > %t.3 +; RUN: -dbi-module-syms -no-file-headers %p/Inputs/empty.pdb > %t.3 ; RUN: llvm-pdbdump pdb2yaml -pdb-stream -tpi-stream \ -; RUN: -no-file-headers %t.2 > %t.4 +; RUN: -dbi-module-syms -no-file-headers %t.2 > %t.4 ; RUN: diff %t.3 %t.4 Index: llvm/tools/llvm-pdbdump/LLVMOutputStyle.cpp =================================================================== --- llvm/tools/llvm-pdbdump/LLVMOutputStyle.cpp +++ llvm/tools/llvm-pdbdump/LLVMOutputStyle.cpp @@ -804,7 +804,8 @@ auto &Types = *ExpectedTypes; ListScope SS(P, "Symbols"); - codeview::CVSymbolDumper SD(P, Types, nullptr, false); + // Symbols in PDB files are 4 byte aligned. + codeview::CVSymbolDumper SD(P, Types, 4, nullptr, false); bool HadError = false; for (auto S : ModS.symbols(&HadError)) { DictScope LL(P, ""); @@ -952,7 +953,8 @@ return ExpectedTypes.takeError(); auto &Tpi = *ExpectedTypes; - codeview::CVSymbolDumper SD(P, Tpi, nullptr, false); + // Symbols in PDB files are 4 byte aligned. + codeview::CVSymbolDumper SD(P, Tpi, 4, nullptr, false); bool HadError = false; for (auto S : Publics->getSymbols(&HadError)) { DictScope DD(P, ""); Index: llvm/tools/llvm-pdbdump/llvm-pdbdump.cpp =================================================================== --- llvm/tools/llvm-pdbdump/llvm-pdbdump.cpp +++ llvm/tools/llvm-pdbdump/llvm-pdbdump.cpp @@ -535,8 +535,10 @@ ExitOnErr(DbiBuilder.addModuleSourceFile(MI.Mod, S)); if (MI.Modi.hasValue()) { const auto &ModiStream = *MI.Modi; - for (auto Symbol : ModiStream.Symbols) - ModiBuilder.addSymbol(Symbol.toCodeViewSymbol(Allocator)); + for (auto Symbol : ModiStream.Symbols) { + // Symbol records in PDBs are padded to 4 bytes. + ModiBuilder.addSymbol(Symbol.toCodeViewSymbol(Allocator, 4)); + } } if (MI.FileLineInfo.hasValue()) { const auto &FLI = *MI.FileLineInfo; Index: llvm/tools/llvm-readobj/COFFDumper.cpp =================================================================== --- llvm/tools/llvm-readobj/COFFDumper.cpp +++ llvm/tools/llvm-readobj/COFFDumper.cpp @@ -978,7 +978,9 @@ Subsection.bytes_end()); auto CODD = llvm::make_unique(*this, Section, Obj, SectionContents); - CVSymbolDumper CVSD(W, Types, std::move(CODD), opts::CodeViewSubsectionBytes); + // Symbols in object files are 1-byte aligned. + CVSymbolDumper CVSD(W, Types, 1, std::move(CODD), + opts::CodeViewSubsectionBytes); CVSymbolArray Symbols; BinaryStreamReader Reader(BinaryData, llvm::support::little); if (auto EC = Reader.readArray(Symbols, Reader.getLength())) {