diff --git a/lld/test/wasm/Inputs/merge-string-debug2.s b/lld/test/wasm/Inputs/merge-string-debug2.s new file mode 100644 --- /dev/null +++ b/lld/test/wasm/Inputs/merge-string-debug2.s @@ -0,0 +1,4 @@ +.section .debug_str,"S",@ + .asciz "clang version 13.0.0" + .asciz "bar" + .asciz "foo" diff --git a/lld/test/wasm/merge-string-debug.s b/lld/test/wasm/merge-string-debug.s new file mode 100644 --- /dev/null +++ b/lld/test/wasm/merge-string-debug.s @@ -0,0 +1,22 @@ +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %s -o %t.o +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/merge-string-debug2.s -o %t2.o + +# RUN: wasm-ld %t.o %t2.o -o %t.wasm --no-entry +# RUN: llvm-readobj -x .debug_str %t.wasm | FileCheck %s --check-prefixes CHECK,CHECK-O1 + +# RUN: wasm-ld -O0 %t.o %t2.o -o %tO0.wasm --no-entry +# RUN: llvm-readobj -x .debug_str %tO0.wasm | FileCheck %s --check-prefixes CHECK,CHECK-O0 + +.section .debug_str,"S",@ + .asciz "clang version 13.0.0" + .asciz "foobar" + +# CHECK: Hex dump of section '.debug_str': + +# CHECK-O0: 0x00000000 636c616e 67207665 7273696f 6e203133 clang version 13 +# CHECK-O0: 0x00000010 2e302e30 00666f6f 62617200 636c616e .0.0.foobar.clan +# CHECK-O0: 0x00000020 67207665 7273696f 6e203133 2e302e30 g version 13.0.0 +# CHECK-O0: 0x00000030 00626172 00666f6f 00 .bar.foo. + +# CHECK-O1: 0x00000000 666f6f62 61720066 6f6f0063 6c616e67 foobar.foo.clang +# CHECK-O1: 0x00000010 20766572 73696f6e 2031332e 302e3000 version 13.0.0. diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -796,7 +796,7 @@ } static void splitSections() { - // splitIntoPieces needs to be called on each MergeInputSection + // splitIntoPieces needs to be called on each MergeInputChunk // before calling finalizeContents(). LLVM_DEBUG(llvm::dbgs() << "splitSections\n"); parallelForEach(symtab->objectFiles, [](ObjFile *file) { @@ -804,6 +804,10 @@ if (auto *s = dyn_cast(seg)) s->splitIntoPieces(); } + for (InputChunk *sec : file->customSections) { + if (auto *s = dyn_cast(sec)) + s->splitIntoPieces(); + } }); } diff --git a/lld/wasm/InputChunks.h b/lld/wasm/InputChunks.h --- a/lld/wasm/InputChunks.h +++ b/lld/wasm/InputChunks.h @@ -43,7 +43,7 @@ MergedSegment, Function, SyntheticFunction, - Section + Section, }; StringRef name; @@ -67,7 +67,7 @@ uint64_t getOffset(uint64_t offset) const; // For data segments translate and offset into the input segment into // and offset into the output segment - uint64_t getSegmentOffset(uint64_t offset) const; + uint64_t getChunkOffset(uint64_t offset) const; uint64_t getVA(uint64_t offset = 0) const; uint32_t getComdat() const { return comdat; } @@ -133,22 +133,24 @@ // each global variable. class InputSegment : public InputChunk { public: - InputSegment(const WasmSegment *seg, ObjFile *f) - : InputChunk(f, InputChunk::DataSegment, seg->Data.Name, - seg->Data.Alignment, seg->Data.LinkingFlags), + InputSegment(const WasmSegment &seg, ObjFile *f) + : InputChunk(f, InputChunk::DataSegment, seg.Data.Name, + seg.Data.Alignment, seg.Data.LinkingFlags), segment(seg) { - rawData = segment->Data.Content; - comdat = segment->Data.Comdat; - inputSectionOffset = segment->SectionOffset; + rawData = segment.Data.Content; + comdat = segment.Data.Comdat; + inputSectionOffset = segment.SectionOffset; } + /* InputSegment(StringRef name, uint32_t alignment, uint32_t flags) : InputChunk(nullptr, InputChunk::DataSegment, name, alignment, flags) {} + */ static bool classof(const InputChunk *c) { return c->kind() == DataSegment; } protected: - const WasmSegment *segment = nullptr; + const WasmSegment &segment; }; class SyntheticMergedChunk; @@ -175,12 +177,19 @@ // This corresponds segments marked as WASM_SEG_FLAG_STRINGS. class MergeInputChunk : public InputChunk { public: - MergeInputChunk(const WasmSegment *seg, ObjFile *f) - : InputChunk(f, Merge, seg->Data.Name, seg->Data.Alignment, - seg->Data.LinkingFlags) { - rawData = seg->Data.Content; - comdat = seg->Data.Comdat; - inputSectionOffset = seg->SectionOffset; + MergeInputChunk(const WasmSegment &seg, ObjFile *f) + : InputChunk(f, Merge, seg.Data.Name, seg.Data.Alignment, + seg.Data.LinkingFlags) { + rawData = seg.Data.Content; + comdat = seg.Data.Comdat; + inputSectionOffset = seg.SectionOffset; + } + + MergeInputChunk(const WasmSection &s, ObjFile *f) + : InputChunk(f, Merge, s.Name, 0, llvm::wasm::WASM_SEG_FLAG_STRINGS) { + assert(s.Type == llvm::wasm::WASM_SEC_CUSTOM); + comdat = s.Comdat; + rawData = s.Content; } static bool classof(const InputChunk *s) { return s->kind() == Merge; } diff --git a/lld/wasm/InputChunks.cpp b/lld/wasm/InputChunks.cpp --- a/lld/wasm/InputChunks.cpp +++ b/lld/wasm/InputChunks.cpp @@ -314,24 +314,24 @@ LLVM_DEBUG(dbgs() << " total: " << (buf + chunkSize - orig) << "\n"); } -uint64_t InputChunk::getOffset(uint64_t offset) const { - return outSecOff + offset; -} - -uint64_t InputChunk::getSegmentOffset(uint64_t offset) const { +uint64_t InputChunk::getChunkOffset(uint64_t offset) const { if (const auto *ms = dyn_cast(this)) { LLVM_DEBUG(dbgs() << "getSegmentOffset(merged): " << getName() << "\n"); LLVM_DEBUG(dbgs() << "offset: " << offset << "\n"); LLVM_DEBUG(dbgs() << "parentOffset: " << ms->getParentOffset(offset) << "\n"); assert(ms->parent); - return ms->parent->getSegmentOffset(ms->getParentOffset(offset)); + return ms->parent->getChunkOffset(ms->getParentOffset(offset)); } return outputSegmentOffset + offset; } +uint64_t InputChunk::getOffset(uint64_t offset) const { + return outSecOff + getChunkOffset(offset); +} + uint64_t InputChunk::getVA(uint64_t offset) const { - return (outputSeg ? outputSeg->startVA : 0) + getSegmentOffset(offset); + return (outputSeg ? outputSeg->startVA : 0) + getChunkOffset(offset); } // Generate code to apply relocations to the data section at runtime. diff --git a/lld/wasm/InputFiles.h b/lld/wasm/InputFiles.h --- a/lld/wasm/InputFiles.h +++ b/lld/wasm/InputFiles.h @@ -141,8 +141,8 @@ std::vector globals; std::vector events; std::vector tables; - std::vector customSections; - llvm::DenseMap customSectionsByIndex; + std::vector customSections; + llvm::DenseMap customSectionsByIndex; Symbol *getSymbol(uint32_t index) const { return symbols[index]; } FunctionSymbol *getFunctionSymbol(uint32_t index) const; diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp --- a/lld/wasm/InputFiles.cpp +++ b/lld/wasm/InputFiles.cpp @@ -360,6 +360,14 @@ config->legacyFunctionTable = true; } +static bool shouldMerge(const WasmSection &sec) { + if (config->optimize == 0) + return false; + // Sadly we don't have section attributes yet for custom sections, so we + // have to go on the name alone. + return sec.Name.startswith(".debug_str"); +} + static bool shouldMerge(const WasmSegment &seg) { // As of now we only support merging strings, and only with single byte // alignment (2^0). @@ -445,7 +453,11 @@ assert(!dataSection); dataSection = §ion; } else if (section.Type == WASM_SEC_CUSTOM) { - auto *customSec = make(section, this); + InputChunk *customSec; + if (shouldMerge(section)) + customSec = make(section, this); + else + customSec = make(section, this); customSec->discarded = isExcludedByComdat(customSec); customSections.emplace_back(customSec); customSections.back()->setRelocations(section.Relocations); @@ -466,9 +478,9 @@ for (const WasmSegment &s : wasmObj->dataSegments()) { InputChunk *seg; if (shouldMerge(s)) { - seg = make(&s, this); + seg = make(s, this); } else - seg = make(&s, this); + seg = make(s, this); seg->discarded = isExcludedByComdat(seg); segments.emplace_back(seg); @@ -585,7 +597,7 @@ return symtab->addDefinedGlobal(name, flags, this, global); } case WASM_SYMBOL_TYPE_SECTION: { - InputSection *section = customSectionsByIndex[sym.Info.ElementIndex]; + InputChunk *section = customSectionsByIndex[sym.Info.ElementIndex]; assert(sym.isBindingLocal()); // Need to return null if discarded here? data and func only do that when // binding is not local. diff --git a/lld/wasm/OutputSections.h b/lld/wasm/OutputSections.h --- a/lld/wasm/OutputSections.h +++ b/lld/wasm/OutputSections.h @@ -111,7 +111,7 @@ // separately and are instead synthesized by the linker. class CustomSection : public OutputSection { public: - CustomSection(std::string name, ArrayRef inputSections) + CustomSection(std::string name, ArrayRef inputSections) : OutputSection(llvm::wasm::WASM_SEC_CUSTOM, name), inputSections(inputSections) {} @@ -128,8 +128,9 @@ void finalizeContents() override; protected: + void finalizeInputSections(); size_t payloadSize = 0; - ArrayRef inputSections; + std::vector inputSections; std::string nameData; }; diff --git a/lld/wasm/OutputSections.cpp b/lld/wasm/OutputSections.cpp --- a/lld/wasm/OutputSections.cpp +++ b/lld/wasm/OutputSections.cpp @@ -12,6 +12,7 @@ #include "OutputSegment.h" #include "WriterUtils.h" #include "lld/Common/ErrorHandler.h" +#include "lld/Common/Memory.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/Parallel.h" @@ -234,13 +235,46 @@ return false; } +// Lots of duplication here with OutputSegment::finalizeInputSegments +void CustomSection::finalizeInputSections() { + SyntheticMergedChunk *mergedSection = nullptr; + std::vector newSections; + + for (InputChunk *s : inputSections) { + MergeInputChunk *ms = dyn_cast(s); + if (!ms) { + newSections.push_back(s); + continue; + } + + // A section should not make it here unless its alive + // assert(ms->live); + + if (!mergedSection) { + mergedSection = + make(name, 0, WASM_SEG_FLAG_STRINGS); + // syn->outputSec = this; + newSections.push_back(mergedSection); + } + mergedSection->addMergeChunk(ms); + } + + if (!mergedSection) + return; + + mergedSection->finalizeContents(); + inputSections = newSections; +} + void CustomSection::finalizeContents() { + finalizeInputSections(); + raw_string_ostream os(nameData); encodeULEB128(name.size(), os); os << name; os.flush(); - for (InputSection *section : inputSections) { + for (InputChunk *section : inputSections) { assert(!section->discarded); section->outputSec = this; section->outSecOff = payloadSize; @@ -264,19 +298,19 @@ buf += nameData.size(); // Write custom sections payload - for (const InputSection *section : inputSections) + for (const InputChunk *section : inputSections) section->writeTo(buf); } uint32_t CustomSection::getNumRelocations() const { uint32_t count = 0; - for (const InputSection *inputSect : inputSections) + for (const InputChunk *inputSect : inputSections) count += inputSect->getNumRelocations(); return count; } void CustomSection::writeRelocations(raw_ostream &os) const { - for (const InputSection *s : inputSections) + for (const InputChunk *s : inputSections) s->writeRelocations(os); } diff --git a/lld/wasm/OutputSegment.cpp b/lld/wasm/OutputSegment.cpp --- a/lld/wasm/OutputSegment.cpp +++ b/lld/wasm/OutputSegment.cpp @@ -55,16 +55,15 @@ return seg->flags == ms->flags && seg->alignment == ms->alignment; }); if (i == mergedSegments.end()) { - LLVM_DEBUG(llvm::dbgs() << "new merge section: " << name + LLVM_DEBUG(llvm::dbgs() << "new merge segment: " << name << " alignment=" << ms->alignment << "\n"); - SyntheticMergedChunk *syn = - make(name, ms->alignment, ms->flags); + auto *syn = make(name, ms->alignment, ms->flags); syn->outputSeg = this; mergedSegments.push_back(syn); i = std::prev(mergedSegments.end()); newSegments.push_back(syn); } else { - LLVM_DEBUG(llvm::dbgs() << "adding to merge section: " << name << "\n"); + LLVM_DEBUG(llvm::dbgs() << "adding to merge segment: " << name << "\n"); } (*i)->addMergeChunk(ms); } diff --git a/lld/wasm/Symbols.h b/lld/wasm/Symbols.h --- a/lld/wasm/Symbols.h +++ b/lld/wasm/Symbols.h @@ -254,14 +254,14 @@ class SectionSymbol : public Symbol { public: - SectionSymbol(uint32_t flags, const InputSection *s, InputFile *f = nullptr) + SectionSymbol(uint32_t flags, const InputChunk *s, InputFile *f = nullptr) : Symbol("", SectionKind, flags, f), section(s) {} static bool classof(const Symbol *s) { return s->kind() == SectionKind; } const OutputSectionSymbol *getOutputSectionSymbol() const; - const InputSection *section; + const InputChunk *section; }; class DataSymbol : public Symbol { diff --git a/lld/wasm/Symbols.cpp b/lld/wasm/Symbols.cpp --- a/lld/wasm/Symbols.cpp +++ b/lld/wasm/Symbols.cpp @@ -301,7 +301,7 @@ uint64_t DefinedData::getOutputSegmentOffset() const { LLVM_DEBUG(dbgs() << "getOutputSegmentOffset: " << getName() << "\n"); - return segment->getSegmentOffset(value); + return segment->getChunkOffset(value); } uint64_t DefinedData::getOutputSegmentIndex() const { diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -100,7 +100,7 @@ uint64_t fileSize = 0; std::vector initFunctions; - llvm::StringMap> customSectionMapping; + llvm::StringMap> customSectionMapping; // Stable storage for command export wrapper function name strings. std::list commandExportWrapperNames; @@ -121,7 +121,7 @@ log("calculateCustomSections"); bool stripDebug = config->stripDebug || config->stripAll; for (ObjFile *file : symtab->objectFiles) { - for (InputSection *section : file->customSections) { + for (InputChunk *section : file->customSections) { // Exclude COMDAT sections that are not selected for inclusion if (section->discarded) continue; diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h --- a/llvm/include/llvm/MC/MCContext.h +++ b/llvm/include/llvm/MC/MCContext.h @@ -19,6 +19,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/BinaryFormat/XCOFF.h" #include "llvm/MC/MCAsmMacro.h" #include "llvm/MC/MCDwarf.h" @@ -610,8 +611,9 @@ getAssociativeCOFFSection(MCSectionCOFF *Sec, const MCSymbol *KeySym, unsigned UniqueID = GenericSectionID); - MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K) { - return getWasmSection(Section, K, 0, nullptr); + MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K, + unsigned Flags = 0) { + return getWasmSection(Section, K, Flags, nullptr); } MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K, diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp --- a/llvm/lib/MC/MCObjectFileInfo.cpp +++ b/llvm/lib/MC/MCObjectFileInfo.cpp @@ -791,9 +791,10 @@ DwarfLineSection = Ctx->getWasmSection(".debug_line", SectionKind::getMetadata()); DwarfLineStrSection = - Ctx->getWasmSection(".debug_line_str", SectionKind::getMetadata()); - DwarfStrSection = - Ctx->getWasmSection(".debug_str", SectionKind::getMetadata()); + Ctx->getWasmSection(".debug_line_str", SectionKind::getMetadata(), + wasm::WASM_SEG_FLAG_STRINGS); + DwarfStrSection = Ctx->getWasmSection( + ".debug_str", SectionKind::getMetadata(), wasm::WASM_SEG_FLAG_STRINGS); DwarfLocSection = Ctx->getWasmSection(".debug_loc", SectionKind::getMetadata()); DwarfAbbrevSection = @@ -836,7 +837,8 @@ DwarfAbbrevDWOSection = Ctx->getWasmSection(".debug_abbrev.dwo", SectionKind::getMetadata()); DwarfStrDWOSection = - Ctx->getWasmSection(".debug_str.dwo", SectionKind::getMetadata()); + Ctx->getWasmSection(".debug_str.dwo", SectionKind::getMetadata(), + wasm::WASM_SEG_FLAG_STRINGS); DwarfLineDWOSection = Ctx->getWasmSection(".debug_line.dwo", SectionKind::getMetadata()); DwarfLocDWOSection =