diff --git a/lld/test/wasm/merge-string.s b/lld/test/wasm/merge-string.s new file mode 100644 --- /dev/null +++ b/lld/test/wasm/merge-string.s @@ -0,0 +1,65 @@ +// RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %s -o %t.o +// RUN: wasm-ld -O2 %t.o -o %t.wasm --no-gc-sections --no-entry +// RUN: obj2yaml %t.wasm | FileCheck %s --check-prefixes=COMMON,MERGE +// RUN: wasm-ld -O0 %t.o -o %t2.wasm --no-gc-sections --no-entry +// RUN: obj2yaml %t2.wasm | FileCheck --check-prefixes=COMMON,NOMERGE %s + + .section .rodata1,"S",@ + .asciz "abc" +foo: + .ascii "a" + .size foo, 1 +bar: + .asciz "bc" + .asciz "bc" + .size bar, 4 + +.globl foo +.globl bar +.export_name foo, foo +.export_name bar, bar + +// COMMON: - Type: GLOBAL +// COMMON-NEXT: Globals: +// COMMON-NEXT: - Index: 0 +// COMMON-NEXT: Type: I32 +// COMMON-NEXT: Mutable: true +// COMMON-NEXT: InitExpr: +// COMMON-NEXT: Opcode: I32_CONST +// COMMON-NEXT: Value: 66576 +// COMMON-NEXT: - Index: 1 +// COMMON-NEXT: Type: I32 +// COMMON-NEXT: Mutable: false +// COMMON-NEXT: InitExpr: +// COMMON-NEXT: Opcode: I32_CONST +// MERGE-NEXT: Value: 1024 +// NOMERGE-NEXT: Value: 1028 +// COMMON-NEXT: - Index: 2 +// COMMON-NEXT: Type: I32 +// COMMON-NEXT: Mutable: false +// COMMON-NEXT: InitExpr: +// COMMON-NEXT: Opcode: I32_CONST +// MERGE-NEXT: Value: 1025 +// NOMERGE-NEXT: Value: 1029 +// COMMON-NEXT: - Type: EXPORT +// COMMON-NEXT: Exports: +// COMMON-NEXT: - Name: memory +// COMMON-NEXT: Kind: MEMORY +// COMMON-NEXT: Index: 0 +// COMMON-NEXT: - Name: foo +// COMMON-NEXT: Kind: GLOBAL +// COMMON-NEXT: Index: 1 +// COMMON-NEXT: - Name: bar +// COMMON-NEXT: Kind: GLOBAL +// COMMON-NEXT: Index: 2 + +// +// COMMON: - Type: DATA +// COMMON-NEXT: Segments: +// COMMON-NEXT: - SectionOffset: 7 +// COMMON-NEXT: InitFlags: 0 +// COMMON-NEXT: Offset: +// COMMON-NEXT: Opcode: I32_CONST +// COMMON-NEXT: Value: 1024 +// MERGE-NEXT: Content: '61626300' +// NOMERGE-NEXT: Content: '6162630061626300626300' diff --git a/lld/wasm/CMakeLists.txt b/lld/wasm/CMakeLists.txt --- a/lld/wasm/CMakeLists.txt +++ b/lld/wasm/CMakeLists.txt @@ -10,6 +10,7 @@ MapFile.cpp MarkLive.cpp OutputSections.cpp + OutputSegment.cpp Relocations.cpp SymbolTable.cpp Symbols.cpp diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -385,7 +385,7 @@ LLVM_ENABLE_NEW_PASS_MANAGER); config->ltoDebugPassManager = args.hasArg(OPT_lto_debug_pass_manager); config->mapFile = args.getLastArgValue(OPT_Map); - config->optimize = args::getInteger(args, OPT_O, 0); + config->optimize = args::getInteger(args, OPT_O, 1); config->outputFile = args.getLastArgValue(OPT_o); config->relocatable = args.hasArg(OPT_relocatable); config->gcSections = @@ -795,6 +795,18 @@ symtab->wrap(w.sym, w.real, w.wrap); } +static void splitSections() { + // splitIntoPieces needs to be called on each MergeInputSection + // before calling finalizeContents(). + LLVM_DEBUG(llvm::dbgs() << "splitSections\n"); + parallelForEach(symtab->objectFiles, [](ObjFile *file) { + for (InputSegment *seg : file->segments) { + if (auto *s = dyn_cast(seg)) + s->splitIntoPieces(); + } + }); +} + void LinkerDriver::linkerMain(ArrayRef argsArr) { WasmOptTable parser; opt::InputArgList args = parser.parse(argsArr.slice(1)); @@ -981,6 +993,10 @@ if (errorCount()) return; + // Split WASM_SEG_FLAG_STRINGS sections into pieces in preparation for garbage + // collection. + splitSections(); + // Do size optimizations: garbage collection markLive(); diff --git a/lld/wasm/InputChunks.h b/lld/wasm/InputChunks.h --- a/lld/wasm/InputChunks.h +++ b/lld/wasm/InputChunks.h @@ -24,6 +24,8 @@ #include "InputFiles.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/LLVM.h" +#include "llvm/ADT/CachedHashString.h" +#include "llvm/MC/StringTableBuilder.h" #include "llvm/Object/Wasm.h" namespace lld { @@ -35,7 +37,14 @@ class InputChunk { public: - enum Kind { DataSegment, Function, SyntheticFunction, Section }; + enum Kind { + DataSegment, + Merge, + MergedSegment, + Function, + SyntheticFunction, + Section + }; Kind kind() const { return sectionKind; } @@ -43,6 +52,7 @@ virtual uint32_t getInputSize() const { return getSize(); }; virtual void writeTo(uint8_t *buf) const; + void relocate(uint8_t *buf) const; ArrayRef getRelocations() const { return relocations; } void setRelocations(ArrayRef rs) { relocations = rs; } @@ -97,34 +107,147 @@ // each global variable. class InputSegment : public InputChunk { public: - InputSegment(const WasmSegment &seg, ObjFile *f) + InputSegment(const WasmSegment *seg, ObjFile *f) : InputChunk(f, InputChunk::DataSegment), segment(seg) { - alignment = segment.Data.Alignment; + alignment = segment->Data.Alignment; + flags = segment->Data.LinkingFlags; } - static bool classof(const InputChunk *c) { return c->kind() == DataSegment; } + InputSegment(uint32_t alignment, uint32_t flags) + : InputChunk(nullptr, InputChunk::DataSegment), alignment(alignment), + flags(flags) {} + + static bool classof(const InputChunk *c) { + return c->kind() == DataSegment || c->kind() == Merge || + c->kind() == MergedSegment; + } void generateRelocationCode(raw_ostream &os) const; - StringRef getName() const override { return segment.Data.Name; } + StringRef getName() const override { return segment->Data.Name; } StringRef getDebugName() const override { return StringRef(); } - uint32_t getComdat() const override { return segment.Data.Comdat; } + uint32_t getComdat() const override { return segment->Data.Comdat; } uint32_t getInputSectionOffset() const override { - return segment.SectionOffset; + return segment->SectionOffset; } + + // Translate an offset in the input section to an offset in the output + // section. + uint64_t getOffset(uint64_t offset) const; + uint64_t getVA(uint64_t offset = 0) const; - const OutputSegment *outputSeg = nullptr; - uint32_t outputSegmentOffset = 0; - uint32_t alignment = 0; bool isTLS() { return getName().startswith(".tdata") || getName().startswith(".tbss"); } + const OutputSegment *outputSeg = nullptr; + uint32_t outputSegmentOffset = 0; + uint32_t alignment = 0; + uint32_t flags = 0; + protected: - ArrayRef data() const override { return segment.Data.Content; } + ArrayRef data() const override { return segment->Data.Content; } - const WasmSegment &segment; + const WasmSegment *segment = nullptr; +}; + +class SyntheticMergedDataSegment; + +// Merge segment handling copied from lld/ELF/InputSection.h. Keep in sync +// where possible. + +// SegmentPiece represents a piece of splittable segment contents. +// We allocate a lot of these and binary search on them. This means that they +// have to be as compact as possible, which is why we don't store the size (can +// be found by looking at the next one). +struct SegmentPiece { + SegmentPiece(size_t off, uint32_t hash, bool live) + : inputOff(off), live(live || !config->gcSections), hash(hash >> 1) {} + + uint32_t inputOff; + uint32_t live : 1; + uint32_t hash : 31; + uint64_t outputOff = 0; +}; + +static_assert(sizeof(SegmentPiece) == 16, "SectionPiece is too big"); + +// This corresponds segments marked as WASM_SEG_FLAG_STRINGS. +class MergeInputSegment : public InputSegment { +public: + MergeInputSegment(const WasmSegment *seg, ObjFile *f) : InputSegment(seg, f) { + sectionKind = Merge; + } + + static bool classof(const InputChunk *s) { return s->kind() == Merge; } + void splitIntoPieces(); + + // Translate an offset in the input section to an offset in the parent + // MergeSyntheticSection. + uint64_t getParentOffset(uint64_t offset) const; + + // Splittable sections are handled as a sequence of data + // rather than a single large blob of data. + std::vector pieces; + + // Returns I'th piece's data. This function is very hot when + // string merging is enabled, so we want to inline. + LLVM_ATTRIBUTE_ALWAYS_INLINE + llvm::CachedHashStringRef getData(size_t i) const { + size_t begin = pieces[i].inputOff; + size_t end = + (pieces.size() - 1 == i) ? data().size() : pieces[i + 1].inputOff; + return {toStringRef(data().slice(begin, end - begin)), pieces[i].hash}; + } + + // Returns the SectionPiece at a given input section offset. + SegmentPiece *getSegmentPiece(uint64_t offset); + const SegmentPiece *getSegmentPiece(uint64_t offset) const { + return const_cast(this)->getSegmentPiece(offset); + } + + SyntheticMergedDataSegment *parent = nullptr; + +private: + void splitStrings(ArrayRef a); +}; + +// SyntheticMergedDataSegment is a class that allows us to put mergeable +// sections with different attributes in a single output sections. To do that we +// put them into SyntheticMergedDataSegment synthetic input sections which are +// attached to regular output sections. +class SyntheticMergedDataSegment : public InputSegment { +public: + SyntheticMergedDataSegment(StringRef name, uint32_t alignment, uint32_t flags) + : InputSegment(alignment, flags), name(name), + builder(llvm::StringTableBuilder::RAW, 1ULL << alignment) { + sectionKind = InputChunk::MergedSegment; + } + + static bool classof(const InputChunk *c) { + return c->kind() == InputChunk::MergedSegment; + } + + uint32_t getSize() const override; + + StringRef getName() const override { return name; } + + uint32_t getComdat() const override { return segments[0]->getComdat(); } + + void writeTo(uint8_t *buf) const override; + + void addMergeSegment(MergeInputSegment *ms) { + ms->parent = this; + segments.push_back(ms); + } + + void finalizeContents(); + +protected: + std::vector segments; + StringRef name; + llvm::StringTableBuilder builder; }; // Represents a single wasm function within and input file. These are diff --git a/lld/wasm/InputChunks.cpp b/lld/wasm/InputChunks.cpp --- a/lld/wasm/InputChunks.cpp +++ b/lld/wasm/InputChunks.cpp @@ -13,6 +13,7 @@ #include "lld/Common/ErrorHandler.h" #include "lld/Common/LLVM.h" #include "llvm/Support/LEB128.h" +#include "llvm/Support/xxhash.h" #define DEBUG_TYPE "lld" @@ -126,6 +127,10 @@ memcpy(buf + outSecOff, data().data(), data().size()); // Apply relocations + relocate(buf + outSecOff); +} + +void InputChunk::relocate(uint8_t *buf) const { if (relocations.empty()) return; @@ -135,11 +140,11 @@ LLVM_DEBUG(dbgs() << "applying relocations: " << toString(this) << " count=" << relocations.size() << "\n"); - int32_t off = outSecOff - getInputSectionOffset(); + int32_t inputSectionOffset = getInputSectionOffset(); auto tombstone = getTombstone(); for (const WasmRelocation &rel : relocations) { - uint8_t *loc = buf + rel.Offset + off; + uint8_t *loc = buf + rel.Offset - inputSectionOffset; auto value = file->calcNewValue(rel, tombstone, this); LLVM_DEBUG(dbgs() << "apply reloc: type=" << relocTypeToString(rel.Type)); if (rel.Type != R_WASM_TYPE_INDEX_LEB) @@ -357,8 +362,20 @@ LLVM_DEBUG(dbgs() << " total: " << (buf + chunkSize - orig) << "\n"); } +uint64_t InputSegment::getOffset(uint64_t offset) const { + if (const MergeInputSegment *ms = dyn_cast(this)) { + LLVM_DEBUG(dbgs() << "getOffset(merged): " << getName() << "\n"); + LLVM_DEBUG(dbgs() << "offset: " << offset << "\n"); + LLVM_DEBUG(dbgs() << "parentOffset: " << ms->getParentOffset(offset) + << "\n"); + assert(ms->parent); + return ms->parent->getOffset(ms->getParentOffset(offset)); + } + return outputSegmentOffset + offset; +} + uint64_t InputSegment::getVA(uint64_t offset) const { - return outputSeg->startVA + outputSegmentOffset + offset; + return (outputSeg ? outputSeg->startVA : 0) + getOffset(offset); } // Generate code to apply relocations to the data section at runtime. @@ -431,6 +448,93 @@ } } +// Split WASM_SEG_FLAG_STRINGS section. Such a section is a sequence of +// null-terminated strings. +void MergeInputSegment::splitStrings(ArrayRef data) { + LLVM_DEBUG(llvm::dbgs() << "splitStrings\n"); + size_t off = 0; + StringRef s = toStringRef(data); + + while (!s.empty()) { + size_t end = s.find(0); + if (end == StringRef::npos) + fatal(toString(this) + ": string is not null terminated"); + size_t size = end + 1; + + pieces.emplace_back(off, xxHash64(s.substr(0, size)), true); + s = s.substr(size); + off += size; + } +} + +// This function is called after we obtain a complete list of input sections +// that need to be linked. This is responsible to split section contents +// into small chunks for further processing. +// +// Note that this function is called from parallelForEach. This must be +// thread-safe (i.e. no memory allocation from the pools). +void MergeInputSegment::splitIntoPieces() { + assert(pieces.empty()); + // As of now we only support WASM_SEG_FLAG_STRINGS but in the future we + // could add other types of splitting (see ELF's splitIntoPieces). + assert(segment->Data.LinkingFlags & WASM_SEG_FLAG_STRINGS); + splitStrings(data()); +} + +SegmentPiece *MergeInputSegment::getSegmentPiece(uint64_t offset) { + if (this->data().size() <= offset) + fatal(toString(this) + ": offset is outside the section"); + + // If Offset is not at beginning of a section piece, it is not in the map. + // In that case we need to do a binary search of the original section piece + // vector. + auto it = partition_point( + pieces, [=](SegmentPiece p) { return p.inputOff <= offset; }); + return &it[-1]; +} + +// Returns the offset in an output section for a given input offset. +// Because contents of a mergeable section is not contiguous in output, +// it is not just an addition to a base output offset. +uint64_t MergeInputSegment::getParentOffset(uint64_t offset) const { + // If Offset is not at beginning of a section piece, it is not in the map. + // In that case we need to search from the original section piece vector. + const SegmentPiece *piece = getSegmentPiece(offset); + uint64_t addend = offset - piece->inputOff; + return piece->outputOff + addend; +} + +uint32_t SyntheticMergedDataSegment::getSize() const { + return builder.getSize(); +} + +void SyntheticMergedDataSegment::writeTo(uint8_t *buf) const { + builder.write(buf + outSecOff); + + // Apply relocations + relocate(buf + outSecOff); +} + +void SyntheticMergedDataSegment::finalizeContents() { + // Add all string pieces to the string table builder to create section + // contents. + for (MergeInputSegment *sec : segments) + for (size_t i = 0, e = sec->pieces.size(); i != e; ++i) + if (sec->pieces[i].live) + builder.add(sec->getData(i)); + + // Fix the string table content. After this, the contents will never change. + builder.finalize(); + + // finalize() fixed tail-optimized strings, so we can now get + // offsets of strings. Get an offset for each string and save it + // to a corresponding SectionPiece for easy access. + for (MergeInputSegment *sec : segments) + for (size_t i = 0, e = sec->pieces.size(); i != e; ++i) + if (sec->pieces[i].live) + sec->pieces[i].outputOff = builder.getOffset(sec->getData(i)); +} + uint64_t InputSection::getTombstoneForSection(StringRef name) { // When a function is not live we need to update relocations referring to it. // If they occur in DWARF debug symbols, we want to change the pc of the diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp --- a/lld/wasm/InputFiles.cpp +++ b/lld/wasm/InputFiles.cpp @@ -425,6 +425,29 @@ config->legacyFunctionTable = true; } +static bool shouldMerge(const WasmSegment &seg) { + // As of now we only support merging strings, and only with single byte + // alignment (2^0). + if (!(seg.Data.LinkingFlags & WASM_SEG_FLAG_STRINGS) || + (seg.Data.Alignment != 0)) + return false; + + // On a regular link we don't merge sections if -O0 (default is -O1). This + // sometimes makes the linker significantly faster, although the output will + // be bigger. + if (config->optimize == 0) + return false; + + // A mergeable section with size 0 is useless because they don't have + // any data to merge. A mergeable string section with size 0 can be + // argued as invalid because it doesn't end with a null character. + // We'll avoid a mess by handling them as if they were non-mergeable. + if (seg.Data.Content.size() == 0) + return false; + + return true; +} + void ObjFile::parse(bool ignoreComdats) { // Parse a memory buffer as a wasm file. LLVM_DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n"); @@ -506,8 +529,13 @@ // Populate `Segments`. for (const WasmSegment &s : wasmObj->dataSegments()) { - auto* seg = make(s, this); + InputSegment *seg; + if (shouldMerge(s)) { + seg = make(&s, this); + } else + seg = make(&s, this); seg->discarded = isExcludedByComdat(seg); + segments.emplace_back(seg); } setRelocs(segments, dataSection); diff --git a/lld/wasm/OutputSegment.h b/lld/wasm/OutputSegment.h --- a/lld/wasm/OutputSegment.h +++ b/lld/wasm/OutputSegment.h @@ -22,21 +22,15 @@ public: OutputSegment(StringRef n) : name(n) {} - void addInputSegment(InputSegment *inSeg) { - uint32_t segAlign = inSeg->alignment; - alignment = std::max(alignment, segAlign); - inputSegments.push_back(inSeg); - size = llvm::alignTo(size, 1ULL << segAlign); - inSeg->outputSeg = this; - inSeg->outputSegmentOffset = size; - size += inSeg->getSize(); - } + void addInputSegment(InputSegment *inSeg); + void finalizeInputSegments(); bool isTLS() const { return name == ".tdata"; } StringRef name; bool isBss = false; uint32_t index = 0; + uint32_t linkingFlags = 0; uint32_t initFlags = 0; uint32_t sectionOffset = 0; uint32_t alignment = 0; diff --git a/lld/wasm/OutputSegment.cpp b/lld/wasm/OutputSegment.cpp new file mode 100644 --- /dev/null +++ b/lld/wasm/OutputSegment.cpp @@ -0,0 +1,88 @@ +//===- OutputSegment.h -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "OutputSegment.h" +#include "InputChunks.h" +#include "lld/Common/Memory.h" + +#define DEBUG_TYPE "lld" + +using namespace llvm; +using namespace llvm::wasm; + +namespace lld { + +namespace wasm { + +void OutputSegment::addInputSegment(InputSegment *inSeg) { + alignment = std::max(alignment, inSeg->alignment); + inputSegments.push_back(inSeg); + size = llvm::alignTo(size, 1ULL << inSeg->alignment); + LLVM_DEBUG(dbgs() << "addInputSegment: " << inSeg->getName() + << " oname=" << name << " size=" << inSeg->getSize() + << " align=" << inSeg->alignment << " at:" << size << "\n"); + inSeg->outputSeg = this; + inSeg->outputSegmentOffset = size; + size += inSeg->getSize(); +} + +// This function scans over the input segments. +// +// It removes MergeInputSegments from the input section array and adds +// new synthetic sections at the location of the first input section +// that it replaces. It then finalizes each synthetic section in order +// to compute an output offset for each piece of each input section. +void OutputSegment::finalizeInputSegments() { + LLVM_DEBUG(llvm::dbgs() << "finalizeInputSegments: " << name << "\n"); + std::vector mergedSegments; + std::vector newSegments; + for (InputSegment *s : inputSegments) { + MergeInputSegment *ms = dyn_cast(s); + if (!ms) { + newSegments.push_back(s); + continue; + } + + // A segment should not make it here unless its alive + assert(ms->live); + + auto i = + llvm::find_if(mergedSegments, [=](SyntheticMergedDataSegment *seg) { + return seg->flags == ms->flags && seg->alignment == ms->alignment; + }); + if (i == mergedSegments.end()) { + LLVM_DEBUG(llvm::dbgs() << "new merge section: " << name + << " alignment=" << ms->alignment << "\n"); + SyntheticMergedDataSegment *syn = + make(name, ms->alignment, ms->flags); + syn->outputSeg = this; + mergedSegments.push_back(syn); + i = std::prev(mergedSegments.end()); + newSegments.push_back(syn); + } else { + LLVM_DEBUG(llvm::dbgs() << "adding to merge section: " << name << "\n"); + } + (*i)->addMergeSegment(ms); + } + + for (auto *ms : mergedSegments) + ms->finalizeContents(); + + inputSegments = newSegments; + size = 0; + for (InputSegment *seg : inputSegments) { + size = llvm::alignTo(size, 1ULL << seg->alignment); + LLVM_DEBUG(llvm::dbgs() << "outputSegmentOffset set: " << seg->getName() + << " -> " << size << "\n"); + seg->outputSegmentOffset = size; + size += seg->getSize(); + } +} + +} // namespace wasm +} // namespace lld diff --git a/lld/wasm/Symbols.cpp b/lld/wasm/Symbols.cpp --- a/lld/wasm/Symbols.cpp +++ b/lld/wasm/Symbols.cpp @@ -146,6 +146,7 @@ void Symbol::markLive() { assert(!isDiscarded()); + referenced = true; if (file != NULL && isDefined()) file->markLive(); if (auto *g = dyn_cast(this)) @@ -154,9 +155,17 @@ e->event->live = true; if (auto *t = dyn_cast(this)) t->table->live = true; - if (InputChunk *c = getChunk()) + if (InputChunk *c = getChunk()) { + // Usually, a whole chunk is marked as live or dead, but in mergeable + // (splittable) sections, each piece of data has independent liveness bit. + // So we explicitly tell it which offset is in use. + if (auto *d = dyn_cast(this)) { + if (auto *ms = dyn_cast(c)) { + ms->getSegmentPiece(d->value)->live = true; + } + } c->live = true; - referenced = true; + } } uint32_t Symbol::getOutputSymbolIndex() const { diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp --- a/lld/wasm/SyntheticSections.cpp +++ b/lld/wasm/SyntheticSections.cpp @@ -536,7 +536,7 @@ for (const OutputSegment *s : dataSegments) { writeStr(sub.os, s->name, "segment name"); writeUleb128(sub.os, s->alignment, "alignment"); - writeUleb128(sub.os, 0, "flags"); + writeUleb128(sub.os, s->linkingFlags, "flags"); } sub.writeTo(os); } diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -870,7 +870,6 @@ s = segmentMap[name]; } s->addInputSegment(segment); - LLVM_DEBUG(dbgs() << "added data: " << name << ": " << s->size << "\n"); } } @@ -890,6 +889,11 @@ for (size_t i = 0; i < segments.size(); ++i) segments[i]->index = i; + + // Merge MergeInputSections into a single MergeSyntheticSection. + LLVM_DEBUG(dbgs() << "-- finalize input semgments\n"); + for (OutputSegment *seg : segments) + seg->finalizeInputSegments(); } void Writer::combineOutputSegments() { @@ -910,6 +914,7 @@ new_segments.push_back(s); } else { if (!combined) { + LLVM_DEBUG(dbgs() << "created combined output segment: .data\n"); combined = make(".data"); combined->startVA = s->startVA; if (config->sharedMemory) @@ -926,6 +931,8 @@ combined->addInputSegment(inSeg); #ifndef NDEBUG uint64_t newVA = inSeg->getVA(); + LLVM_DEBUG(dbgs() << "added input segment. name=" << inSeg->getName() + << " oldVA=" << oldVA << " newVA=" << newVA << "\n"); assert(oldVA == newVA); #endif } diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h --- a/llvm/include/llvm/BinaryFormat/Wasm.h +++ b/llvm/include/llvm/BinaryFormat/Wasm.h @@ -154,7 +154,7 @@ ArrayRef Content; StringRef Name; // from the "segment info" section uint32_t Alignment; - uint32_t LinkerFlags; + uint32_t LinkingFlags; uint32_t Comdat; // from the "comdat info" section }; @@ -357,6 +357,10 @@ WASM_SYMBOL_TYPE_TABLE = 0x5, }; +enum WasmSegmentFlag : unsigned { + WASM_SEG_FLAG_STRINGS = 0x1, +}; + // Kinds of event attributes. enum WasmEventAttribute : unsigned { WASM_EVENT_ATTRIBUTE_EXCEPTION = 0x0, diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h --- a/llvm/include/llvm/MC/MCContext.h +++ b/llvm/include/llvm/MC/MCContext.h @@ -611,26 +611,27 @@ unsigned UniqueID = GenericSectionID); MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K) { - return getWasmSection(Section, K, nullptr); + return getWasmSection(Section, K, 0, nullptr); } MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K, - const char *BeginSymName) { - return getWasmSection(Section, K, "", ~0, BeginSymName); + unsigned Flags, const char *BeginSymName) { + return getWasmSection(Section, K, Flags, "", ~0, BeginSymName); } MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K, - const Twine &Group, unsigned UniqueID) { - return getWasmSection(Section, K, Group, UniqueID, nullptr); + unsigned Flags, const Twine &Group, + unsigned UniqueID) { + return getWasmSection(Section, K, Flags, Group, UniqueID, nullptr); } MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K, - const Twine &Group, unsigned UniqueID, - const char *BeginSymName); + unsigned Flags, const Twine &Group, + unsigned UniqueID, const char *BeginSymName); MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K, - const MCSymbolWasm *Group, unsigned UniqueID, - const char *BeginSymName); + unsigned Flags, const MCSymbolWasm *Group, + unsigned UniqueID, const char *BeginSymName); MCSectionXCOFF *getXCOFFSection( StringRef Section, SectionKind K, diff --git a/llvm/include/llvm/MC/MCSectionWasm.h b/llvm/include/llvm/MC/MCSectionWasm.h --- a/llvm/include/llvm/MC/MCSectionWasm.h +++ b/llvm/include/llvm/MC/MCSectionWasm.h @@ -37,14 +37,18 @@ // segment uint32_t SegmentIndex = 0; - // Whether this data segment is passive + // For data sections, whether to use a passive segment bool IsPassive = false; + // For data sections, bitfield of WasmSegmentFlag + unsigned SegmentFlags; + // The storage of Name is owned by MCContext's WasmUniquingMap. friend class MCContext; - MCSectionWasm(StringRef Name, SectionKind K, const MCSymbolWasm *group, - unsigned UniqueID, MCSymbol *Begin) - : MCSection(SV_Wasm, Name, K, Begin), UniqueID(UniqueID), Group(group) {} + MCSectionWasm(StringRef Name, SectionKind K, unsigned SegmentFlags, + const MCSymbolWasm *Group, unsigned UniqueID, MCSymbol *Begin) + : MCSection(SV_Wasm, Name, K, Begin), UniqueID(UniqueID), Group(Group), + SegmentFlags(SegmentFlags) {} public: /// Decides whether a '.section' directive should be printed before the @@ -52,6 +56,7 @@ bool shouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const; const MCSymbolWasm *getGroup() const { return Group; } + unsigned getSegmentFlags() const { return SegmentFlags; } void PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T, raw_ostream &OS, diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -21,6 +21,7 @@ #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/BinaryFormat/MachO.h" +#include "llvm/BinaryFormat/Wasm.h" #include "llvm/CodeGen/BasicBlockSectionUtils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" @@ -2005,6 +2006,17 @@ return C; } +static unsigned getWasmSectionFlags(SectionKind K) { + unsigned Flags = 0; + + // TODO(sbc): Add suport for K.isMergeableConst() + + if (K.isMergeableCString()) + Flags |= wasm::WASM_SEG_FLAG_STRINGS; + + return Flags; +} + MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { // We don't support explict section names for functions in the wasm object @@ -2028,9 +2040,9 @@ Group = C->getName(); } - MCSectionWasm* Section = - getContext().getWasmSection(Name, Kind, Group, - MCContext::GenericSectionID); + unsigned Flags = getWasmSectionFlags(Kind); + MCSectionWasm *Section = getContext().getWasmSection( + Name, Kind, Flags, Group, MCContext::GenericSectionID); return Section; } @@ -2062,7 +2074,8 @@ (*NextUniqueID)++; } - return Ctx.getWasmSection(Name, Kind, Group, UniqueID); + unsigned Flags = getWasmSectionFlags(Kind); + return Ctx.getWasmSection(Name, Kind, Flags, Group, UniqueID); } MCSection *TargetLoweringObjectFileWasm::SelectSectionForGlobal( diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp --- a/llvm/lib/MC/MCContext.cpp +++ b/llvm/lib/MC/MCContext.cpp @@ -672,7 +672,8 @@ } MCSectionWasm *MCContext::getWasmSection(const Twine &Section, SectionKind K, - const Twine &Group, unsigned UniqueID, + unsigned Flags, const Twine &Group, + unsigned UniqueID, const char *BeginSymName) { MCSymbolWasm *GroupSym = nullptr; if (!Group.isTriviallyEmpty() && !Group.str().empty()) { @@ -680,10 +681,11 @@ GroupSym->setComdat(true); } - return getWasmSection(Section, K, GroupSym, UniqueID, BeginSymName); + return getWasmSection(Section, K, Flags, GroupSym, UniqueID, BeginSymName); } MCSectionWasm *MCContext::getWasmSection(const Twine &Section, SectionKind Kind, + unsigned Flags, const MCSymbolWasm *GroupSym, unsigned UniqueID, const char *BeginSymName) { @@ -704,7 +706,7 @@ cast(Begin)->setType(wasm::WASM_SYMBOL_TYPE_SECTION); MCSectionWasm *Result = new (WasmAllocator.Allocate()) - MCSectionWasm(CachedName, Kind, GroupSym, UniqueID, Begin); + MCSectionWasm(CachedName, Kind, Flags, GroupSym, UniqueID, Begin); Entry.second = Result; auto *F = new MCDataFragment(); diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp --- a/llvm/lib/MC/MCObjectFileInfo.cpp +++ b/llvm/lib/MC/MCObjectFileInfo.cpp @@ -855,9 +855,9 @@ // DWP Sections DwarfCUIndexSection = - Ctx->getWasmSection(".debug_cu_index", SectionKind::getMetadata(), 0); + Ctx->getWasmSection(".debug_cu_index", SectionKind::getMetadata()); DwarfTUIndexSection = - Ctx->getWasmSection(".debug_tu_index", SectionKind::getMetadata(), 0); + Ctx->getWasmSection(".debug_tu_index", SectionKind::getMetadata()); // Wasm use data section for LSDA. // TODO Consider putting each function's exception table in a separate @@ -1008,8 +1008,8 @@ return Ctx->getELFSection(Name, ELF::SHT_PROGBITS, ELF::SHF_GROUP, 0, utostr(Hash), /*IsComdat=*/true); case Triple::Wasm: - return Ctx->getWasmSection(Name, SectionKind::getMetadata(), utostr(Hash), - MCContext::GenericSectionID); + return Ctx->getWasmSection(Name, SectionKind::getMetadata(), 0, + utostr(Hash), MCContext::GenericSectionID); case Triple::MachO: case Triple::COFF: case Triple::GOFF: diff --git a/llvm/lib/MC/MCParser/WasmAsmParser.cpp b/llvm/lib/MC/MCParser/WasmAsmParser.cpp --- a/llvm/lib/MC/MCParser/WasmAsmParser.cpp +++ b/llvm/lib/MC/MCParser/WasmAsmParser.cpp @@ -90,7 +90,8 @@ return false; } - bool parseSectionFlags(StringRef FlagStr, bool &Passive, bool &Group) { + uint32_t parseSectionFlags(StringRef FlagStr, bool &Passive, bool &Group) { + uint32_t flags = 0; for (char C : FlagStr) { switch (C) { case 'p': @@ -99,12 +100,14 @@ case 'G': Group = true; break; + case 'S': + flags |= wasm::WASM_SEG_FLAG_STRINGS; + break; default: - return Parser->Error(getTok().getLoc(), - StringRef("Unexepcted section flag: ") + FlagStr); + return -1U; } } - return false; + return flags; } bool parseGroup(StringRef &GroupName) { @@ -128,7 +131,7 @@ return false; } - bool parseSectionDirective(StringRef, SMLoc) { + bool parseSectionDirective(StringRef, SMLoc loc) { StringRef Name; if (Parser->parseIdentifier(Name)) return TokError("expected identifier in directive"); @@ -156,8 +159,10 @@ // Update section flags if present in this .section directive bool Passive = false; bool Group = false; - if (parseSectionFlags(getTok().getStringContents(), Passive, Group)) - return true; + uint32_t Flags = + parseSectionFlags(getTok().getStringContents(), Passive, Group); + if (Flags == -1U) + return TokError("unknown flag"); Lex(); @@ -173,13 +178,19 @@ // TODO: Parse UniqueID MCSectionWasm *WS = getContext().getWasmSection( - Name, Kind.getValue(), GroupName, MCContext::GenericSectionID); + Name, Kind.getValue(), Flags, GroupName, MCContext::GenericSectionID); + + if (WS->getSegmentFlags() != Flags) + Parser->Error(loc, "changed section flags for " + Name + + ", expected: 0x" + + utohexstr(WS->getSegmentFlags())); + if (Passive) { if (!WS->isWasmData()) - return Parser->Error(getTok().getLoc(), - "Only data sections can be passive"); + return Parser->Error(loc, "Only data sections can be passive"); WS->setPassive(); } + getStreamer().SwitchSection(WS); return false; } diff --git a/llvm/lib/MC/MCSectionWasm.cpp b/llvm/lib/MC/MCSectionWasm.cpp --- a/llvm/lib/MC/MCSectionWasm.cpp +++ b/llvm/lib/MC/MCSectionWasm.cpp @@ -64,9 +64,11 @@ OS << ",\""; if (IsPassive) - OS << "p"; + OS << 'p'; if (Group) - OS << "G"; + OS << 'G'; + if (SegmentFlags & wasm::WASM_SEG_FLAG_STRINGS) + OS << 'S'; OS << '"'; diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp --- a/llvm/lib/MC/WasmObjectWriter.cpp +++ b/llvm/lib/MC/WasmObjectWriter.cpp @@ -67,7 +67,7 @@ uint32_t InitFlags; uint64_t Offset; uint32_t Alignment; - uint32_t LinkerFlags; + uint32_t LinkingFlags; SmallVector Data; }; @@ -1133,7 +1133,7 @@ for (const WasmDataSegment &Segment : DataSegments) { writeString(Segment.Name); encodeULEB128(Segment.Alignment, W->OS); - encodeULEB128(Segment.LinkerFlags, W->OS); + encodeULEB128(Segment.LinkingFlags, W->OS); } endSection(SubSection); } @@ -1440,7 +1440,7 @@ Segment.Section = &Section; addData(Segment.Data, Section); Segment.Alignment = Log2_32(Section.getAlignment()); - Segment.LinkerFlags = 0; + Segment.LinkingFlags = Section.getSegmentFlags(); DataSize += Segment.Data.size(); Section.setSegmentIndex(SegmentIndex); diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp --- a/llvm/lib/Object/WasmObjectFile.cpp +++ b/llvm/lib/Object/WasmObjectFile.cpp @@ -462,7 +462,7 @@ for (uint32_t I = 0; I < Count; I++) { DataSegments[I].Data.Name = readString(Ctx); DataSegments[I].Data.Alignment = readVaruint32(Ctx); - DataSegments[I].Data.LinkerFlags = readVaruint32(Ctx); + DataSegments[I].Data.LinkingFlags = readVaruint32(Ctx); } break; } @@ -1431,7 +1431,7 @@ // The rest of these Data fields are set later, when reading in the linking // metadata section. Segment.Data.Alignment = 0; - Segment.Data.LinkerFlags = 0; + Segment.Data.LinkingFlags = 0; Segment.Data.Comdat = UINT32_MAX; Segment.SectionOffset = Ctx.Ptr - Ctx.Start; Ctx.Ptr += Size; diff --git a/llvm/lib/ObjectYAML/WasmYAML.cpp b/llvm/lib/ObjectYAML/WasmYAML.cpp --- a/llvm/lib/ObjectYAML/WasmYAML.cpp +++ b/llvm/lib/ObjectYAML/WasmYAML.cpp @@ -541,7 +541,11 @@ } void ScalarBitSetTraits::bitset( - IO &IO, WasmYAML::SegmentFlags &Value) {} + IO &IO, WasmYAML::SegmentFlags &Value) { +#define BCase(X) IO.bitSetCase(Value, #X, wasm::WASM_SEG_FLAG_##X) + BCase(STRINGS); +#undef BCase +} void ScalarBitSetTraits::bitset( IO &IO, WasmYAML::SymbolFlags &Value) { diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp --- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp +++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp @@ -1070,7 +1070,7 @@ if (Group) WasmSym->setComdat(true); auto *WS = - getContext().getWasmSection(SecName, SectionKind::getText(), Group, + getContext().getWasmSection(SecName, SectionKind::getText(), 0, Group, MCContext::GenericSectionID, nullptr); getStreamer().SwitchSection(WS); // Also generate DWARF for this section if requested. diff --git a/llvm/test/MC/WebAssembly/section-flags-changed.s b/llvm/test/MC/WebAssembly/section-flags-changed.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/WebAssembly/section-flags-changed.s @@ -0,0 +1,7 @@ +# RUN: not llvm-mc -triple=wasm32-unknown-unknown %s -o /dev/null 2>&1 | FileCheck %s --implicit-check-not=error: + +foo: +.section .foo,"S",@ + +# CHECK: {{.*}}.s:[[# @LINE+1]]:1: error: changed section flags for .foo, expected: 0x1 +.section .foo,"",@ diff --git a/llvm/tools/obj2yaml/wasm2yaml.cpp b/llvm/tools/obj2yaml/wasm2yaml.cpp --- a/llvm/tools/obj2yaml/wasm2yaml.cpp +++ b/llvm/tools/obj2yaml/wasm2yaml.cpp @@ -100,7 +100,7 @@ SegmentInfo.Name = Segment.Data.Name; SegmentInfo.Index = SegmentIndex; SegmentInfo.Alignment = Segment.Data.Alignment; - SegmentInfo.Flags = Segment.Data.LinkerFlags; + SegmentInfo.Flags = Segment.Data.LinkingFlags; LinkingSec->SegmentInfos.push_back(SegmentInfo); } if (Segment.Data.Comdat != UINT32_MAX) {