Index: lld/COFF/PDB.cpp =================================================================== --- lld/COFF/PDB.cpp +++ lld/COFF/PDB.cpp @@ -760,9 +760,11 @@ } static void remapTypesInSymbolRecord(ObjFile *File, SymbolKind SymKind, - MutableArrayRef Contents, + MutableArrayRef RecordBytes, const CVIndexMap &IndexMap, ArrayRef TypeRefs) { + MutableArrayRef Contents = + RecordBytes.drop_front(sizeof(RecordPrefix)); for (const TiReference &Ref : TypeRefs) { unsigned ByteSize = Ref.Count * sizeof(TypeIndex); if (Contents.size() < Ref.Offset + ByteSize) @@ -808,7 +810,7 @@ switch (Kind) { case SymbolKind::S_FILESTATIC: // FileStaticSym::ModFileOffset - recordStringTableReferenceAtOffset(Contents, 4, StrTableRefs); + recordStringTableReferenceAtOffset(Contents, 8, StrTableRefs); break; case SymbolKind::S_DEFRANGE: case SymbolKind::S_DEFRANGE_SUBFIELD: @@ -873,21 +875,22 @@ /// Copy the symbol record. In a PDB, symbol records must be 4 byte aligned. /// The object file may not be aligned. -static MutableArrayRef copySymbolForPdb(const CVSymbol &Sym, - BumpPtrAllocator &Alloc) { +static MutableArrayRef +copyAndAlignSymbol(const CVSymbol &Sym, MutableArrayRef &AlignedMem) { size_t Size = alignTo(Sym.length(), alignOf(CodeViewContainer::Pdb)); assert(Size >= 4 && "record too short"); assert(Size <= MaxRecordLength && "record too long"); - void *Mem = Alloc.Allocate(Size, 4); + assert(AlignedMem.size() >= Size && "didn't preallocate enough"); // Copy the symbol record and zero out any padding bytes. - MutableArrayRef NewData(reinterpret_cast(Mem), Size); + MutableArrayRef NewData = AlignedMem.take_front(Size); + AlignedMem = AlignedMem.drop_front(Size); memcpy(NewData.data(), Sym.data().data(), Sym.length()); memset(NewData.data() + Sym.length(), 0, Size - Sym.length()); // Update the record prefix length. It should point to the beginning of the // next record. - auto *Prefix = reinterpret_cast(Mem); + auto *Prefix = reinterpret_cast(NewData.data()); Prefix->RecordLen = Size - 2; return NewData; } @@ -1001,8 +1004,8 @@ } } -static void addGlobalSymbol(pdb::GSIStreamBuilder &Builder, ObjFile &File, - const CVSymbol &Sym) { +static void addGlobalSymbol(pdb::GSIStreamBuilder &Builder, unsigned ModIndex, + unsigned SymOffset, const CVSymbol &Sym) { switch (Sym.kind()) { case SymbolKind::S_CONSTANT: case SymbolKind::S_UDT: @@ -1018,12 +1021,12 @@ if (Sym.kind() == SymbolKind::S_LPROC32) K = SymbolRecordKind::LocalProcRef; ProcRefSym PS(K); - PS.Module = static_cast(File.ModuleDBI->getModuleIndex()); + PS.Module = static_cast(ModIndex); // For some reason, MSVC seems to add one to this value. ++PS.Module; PS.Name = getSymbolName(Sym); PS.SumName = 0; - PS.SymOffset = File.ModuleDBI->getNextSymbolOffset(); + PS.SymOffset = SymOffset; Builder.addGlobalSymbol(PS); break; } @@ -1039,8 +1042,53 @@ cantFail(SymData.readBytes(0, SymData.getLength(), SymsBuffer)); SmallVector Scopes; + // Iterate every symbol to check if any need to be realigned, and if so, how + // much space we need to allocate for them. + bool NeedsRealignment = false; + unsigned RealignedSize = 0; auto EC = forEachCodeViewRecord( SymsBuffer, [&](CVSymbol Sym) -> llvm::Error { + if (Sym.length() % alignOf(CodeViewContainer::Pdb) != 0) + NeedsRealignment = true; + RealignedSize += alignTo(Sym.length(), alignOf(CodeViewContainer::Pdb)); + return Error::success(); + }); + + // If any of the symbol record lengths was corrupt, ignore them all, warn + // about it, and move on. + if (EC) { + warn("corrupt symbol records in " + File->getName()); + consumeError(std::move(EC)); + return; + } + + // If any symbol needed realignment, allocate enough contiguous memory for + // them all. Typically symbol subsections are small enough that this will not + // cause fragmentation. + MutableArrayRef AlignedSymbolMem; + if (NeedsRealignment) { + void *AlignedData = Alloc.Allocate(RealignedSize, 4); + AlignedSymbolMem = makeMutableArrayRef( + reinterpret_cast(AlignedData), RealignedSize); + } + + // Iterate again, this time doing the real work. + unsigned CurSymOffset = File->ModuleDBI->getNextSymbolOffset(); + ArrayRef BulkSymbols; + cantFail(forEachCodeViewRecord( + SymsBuffer, [&](CVSymbol Sym) -> llvm::Error { + // Align the record if required. + MutableArrayRef RecordBytes; + if (NeedsRealignment) { + RecordBytes = copyAndAlignSymbol(Sym, AlignedSymbolMem); + Sym = CVSymbol(Sym.kind(), RecordBytes); + } else { + // Otherwise, we can actually mutate the symbol directly, since we + // copied it to apply relocations. + RecordBytes = makeMutableArrayRef( + const_cast(Sym.data().data()), Sym.length()); + } + // Discover type index references in the record. Skip it if we don't // know where they are. SmallVector TypeRefs; @@ -1050,45 +1098,51 @@ return Error::success(); } - // Copy the symbol and fix the symbol record alignment. The symbol - // record in the object file may not be aligned. - MutableArrayRef NewData = copySymbolForPdb(Sym, Alloc); - Sym = CVSymbol(Sym.kind(), NewData); - // Re-map all the type index references. - MutableArrayRef Contents = - NewData.drop_front(sizeof(RecordPrefix)); - remapTypesInSymbolRecord(File, Sym.kind(), Contents, IndexMap, + remapTypesInSymbolRecord(File, Sym.kind(), RecordBytes, IndexMap, TypeRefs); // An object file may have S_xxx_ID symbols, but these get converted to // "real" symbols in a PDB. - translateIdSymbols(NewData, getIDTable()); - Sym = CVSymbol(symbolKind(NewData), NewData); + translateIdSymbols(RecordBytes, getIDTable()); + Sym = CVSymbol(symbolKind(RecordBytes), RecordBytes); // If this record refers to an offset in the object file's string table, // add that item to the global PDB string table and re-write the index. - recordStringTableReferences(Sym.kind(), Contents, StringTableRefs); + recordStringTableReferences(Sym.kind(), RecordBytes, StringTableRefs); // Fill in "Parent" and "End" fields by maintaining a stack of scopes. if (symbolOpensScope(Sym.kind())) - scopeStackOpen(Scopes, File->ModuleDBI->getNextSymbolOffset(), Sym); + scopeStackOpen(Scopes, CurSymOffset, Sym); else if (symbolEndsScope(Sym.kind())) - scopeStackClose(Scopes, File->ModuleDBI->getNextSymbolOffset(), File); + scopeStackClose(Scopes, CurSymOffset, File); // Add the symbol to the globals stream if necessary. Do this before // adding the symbol to the module since we may need to get the next // symbol offset, and writing to the module's symbol stream will update // that offset. if (symbolGoesInGlobalsStream(Sym)) - addGlobalSymbol(Builder.getGsiBuilder(), *File, Sym); - - // Add the symbol to the module. - if (symbolGoesInModuleStream(Sym)) - File->ModuleDBI->addSymbol(Sym); + addGlobalSymbol(Builder.getGsiBuilder(), + File->ModuleDBI->getModuleIndex(), CurSymOffset, Sym); + + if (symbolGoesInModuleStream(Sym)) { + // Add symbols to the module in bulk. If this symbol is contiguous + // with the previous run of symbols to add, combine the ranges. If + // not, close the previous range of symbols and start a new one. + if (Sym.data().data() == BulkSymbols.end()) { + BulkSymbols = makeArrayRef(BulkSymbols.data(), + BulkSymbols.size() + Sym.length()); + } else { + File->ModuleDBI->addSymbolsInBulk(BulkSymbols); + BulkSymbols = RecordBytes; + } + CurSymOffset += Sym.length(); + } return Error::success(); - }); - cantFail(std::move(EC)); + })); + + if (!BulkSymbols.empty()) + File->ModuleDBI->addSymbolsInBulk(BulkSymbols); } // Allocate memory for a .debug$S / .debug$F section and relocate it. Index: llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h =================================================================== --- llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h +++ llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h @@ -51,6 +51,7 @@ void setObjFileName(StringRef Name); void setFirstSectionContrib(const SectionContrib &SC); void addSymbol(codeview::CVSymbol Symbol); + void addSymbolsInBulk(ArrayRef BulkSymbols); void addDebugSubsection(std::shared_ptr Subsection); @@ -91,7 +92,7 @@ std::string ModuleName; std::string ObjFileName; std::vector SourceFiles; - std::vector Symbols; + std::vector> Symbols; std::vector> C13Builders; Index: llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp =================================================================== --- llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp +++ llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp @@ -19,7 +19,6 @@ #include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" -#include "llvm/Support/BinaryItemStream.h" #include "llvm/Support/BinaryStreamWriter.h" using namespace llvm; @@ -66,12 +65,18 @@ } void DbiModuleDescriptorBuilder::addSymbol(CVSymbol Symbol) { - Symbols.push_back(Symbol); - // Symbols written to a PDB file are required to be 4 byte aligned. The same + // Defer to the bulk API. It does the same thing. + addSymbolsInBulk(Symbol.data()); +} + +void DbiModuleDescriptorBuilder::addSymbolsInBulk( + ArrayRef BulkSymbols) { + Symbols.push_back(BulkSymbols); + // Symbols written to a PDB file are required to be 4 byte aligned. The same // is not true of object files. - assert(Symbol.length() % alignOf(CodeViewContainer::Pdb) == 0 && + assert(BulkSymbols.size() % alignOf(CodeViewContainer::Pdb) == 0 && "Invalid Symbol alignment!"); - SymbolByteSize += Symbol.length(); + SymbolByteSize += BulkSymbols.size(); } void DbiModuleDescriptorBuilder::addSourceFile(StringRef Path) { @@ -145,16 +150,11 @@ if (auto EC = SymbolWriter.writeInteger(COFF::DEBUG_SECTION_MAGIC)) return EC; - BinaryItemStream Records(llvm::support::endianness::little); - Records.setItems(Symbols); - BinaryStreamRef RecordsRef(Records); - if (auto EC = SymbolWriter.writeStreamRef(RecordsRef)) - return EC; - if (auto EC = SymbolWriter.padToAlignment(4)) - return EC; - // TODO: Write C11 Line data + for (ArrayRef Syms : Symbols) + SymbolWriter.writeBytes(Syms); assert(SymbolWriter.getOffset() % alignOf(CodeViewContainer::Pdb) == 0 && "Invalid debug section alignment!"); + // TODO: Write C11 Line data for (const auto &Builder : C13Builders) { assert(Builder && "Empty C13 Fragment Builder!"); if (auto EC = Builder->commit(SymbolWriter))