diff --git a/lld/test/wasm/gc-sections-metadata-startstop.ll b/lld/test/wasm/gc-sections-metadata-startstop.ll new file mode 100644 --- /dev/null +++ b/lld/test/wasm/gc-sections-metadata-startstop.ll @@ -0,0 +1,73 @@ +; RUN: llc -filetype=obj -o %t.o %s +; RUN: wasm-ld %t.o --gc-sections -o %t.wasm +; RUN: llvm-objdump -d --no-show-raw-insn %t.wasm | FileCheck %s + +target triple = "wasm32-unknown-unknown" + +; FOO_MD symbol is not used directly, but is referenced through __start/__stop_foo_md +@FOO_MD = global [4 x i8] c"bar\00", section "foo_md", align 1 +@__start_foo_md = external constant i8* +@__stop_foo_md = external constant i8* + +define i32 @foo_md_size() { +entry: + ret i32 sub ( + i32 ptrtoint (i8** @__stop_foo_md to i32), + i32 ptrtoint (i8** @__start_foo_md to i32) + ) +} + +; CHECK: : +; CHECK-EMPTY: +; CHECK-NEXT: i32.const [[#STOP_ADDR:]] +; CHECK-NEXT: i32.const [[#STOP_ADDR - 4]] +; CHECK-NEXT: i32.sub + + +; All segments in concat_section section are marked as live. +@concat_segment_1 = global [4 x i8] c"xxx\00", section "concat_section", align 1 +@concat_segment_2 = global [4 x i8] c"yyy\00", section "concat_section", align 1 +@__start_concat_section = external constant i8* +@__stop_concat_section = external constant i8* + +define i32 @concat_section_size() { +entry: + ret i32 sub ( + i32 ptrtoint (i8** @__stop_concat_section to i32), + i32 ptrtoint (i8** @__start_concat_section to i32) + ) +} + +; CHECK: : +; CHECK-EMPTY: +; CHECK-NEXT: i32.const [[#STOP_ADDR:]] +; CHECK-NEXT: i32.const [[#STOP_ADDR - 8]] +; CHECK-NEXT: i32.sub + + +; __start/__stop symbols don't retain invalid C name sections +@invalid_name_section = global [5 x i8] c"fizz\00", section "invalid.dot.name", align 1 +@__start_invalid.dot.name = extern_weak constant i8* +@__stop_invalid.dot.name = extern_weak constant i8* + +define i32 @invalid_name_section_size() { +entry: + ret i32 sub ( + i32 ptrtoint (i8** @__stop_invalid.dot.name to i32), + i32 ptrtoint (i8** @__start_invalid.dot.name to i32) + ) +} + +define void @_start() { +entry: + call i32 @foo_md_size() + call i32 @concat_section_size() + call i32 @invalid_name_section_size() + ret void +} + +; CHECK: : +; CHECK-EMPTY: +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32.sub diff --git a/lld/wasm/InputChunks.h b/lld/wasm/InputChunks.h --- a/lld/wasm/InputChunks.h +++ b/lld/wasm/InputChunks.h @@ -76,6 +76,7 @@ uint32_t getComdat() const { return comdat; } StringRef getComdatName() const; uint32_t getInputSectionOffset() const { return inputSectionOffset; } + StringRef getOutputSegmentName() const; size_t getNumRelocations() const { return relocations.size(); } void writeRelocations(llvm::raw_ostream &os) const; diff --git a/lld/wasm/InputChunks.cpp b/lld/wasm/InputChunks.cpp --- a/lld/wasm/InputChunks.cpp +++ b/lld/wasm/InputChunks.cpp @@ -523,5 +523,24 @@ return UINT64_C(-1); } +StringRef InputChunk::getOutputSegmentName() const { + // We always merge .tbss and .tdata into a single TLS segment so all TLS + // symbols are be relative to single __tls_base. + if (this->isTLS()) + return ".tdata"; + StringRef name = this->getName(); + if (!config->mergeDataSegments) + return name; + if (name.startswith(".text.")) + return ".text"; + if (name.startswith(".data.")) + return ".data"; + if (name.startswith(".bss.")) + return ".bss"; + if (name.startswith(".rodata.")) + return ".rodata"; + return name; +} + } // namespace wasm } // namespace lld diff --git a/lld/wasm/MarkLive.cpp b/lld/wasm/MarkLive.cpp --- a/lld/wasm/MarkLive.cpp +++ b/lld/wasm/MarkLive.cpp @@ -24,6 +24,7 @@ #include "InputElement.h" #include "SymbolTable.h" #include "Symbols.h" +#include "lld/Common/Strings.h" #define DEBUG_TYPE "lld" @@ -42,6 +43,7 @@ private: void enqueue(Symbol *sym); void enqueueInitFunctions(const ObjFile *sym); + void enqueueWholeChunk(InputChunk *chunk); void mark(); bool isCallCtorsLive(); @@ -84,6 +86,12 @@ } } +void MarkLive::enqueueWholeChunk(InputChunk *chunk) { + LLVM_DEBUG(dbgs() << "markLive: " << chunk->getName() << "\n"); + chunk->live = true; + queue.push_back(chunk); +} + void MarkLive::run() { // Add GC root symbols. if (!config->entry.empty()) @@ -97,10 +105,24 @@ if (WasmSym::callDtors) enqueue(WasmSym::callDtors); - // Enqueue constructors in objects explicitly live from the command-line. - for (const ObjFile *obj : symtab->objectFiles) - if (obj->isLive()) - enqueueInitFunctions(obj); + for (const ObjFile *obj : symtab->objectFiles) { + if (!obj->isLive()) { + continue; + } + // Enqueue constructors in objects explicitly live from the command-line. + enqueueInitFunctions(obj); + + // Enqueue data segments referenced through __start/__stop symbols. + for (InputChunk *segment : obj->segments) { + auto name = segment->getOutputSegmentName(); + if (!isValidCIdentifier(name)) + continue; + if (symtab->find(("__start_" + name).str()) || + symtab->find(("__stop_" + name).str())) { + enqueueWholeChunk(segment); + } + } + } mark(); diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -854,25 +854,6 @@ out.tableSec->assignIndexes(); } -static StringRef getOutputDataSegmentName(const InputChunk &seg) { - // We always merge .tbss and .tdata into a single TLS segment so all TLS - // symbols are be relative to single __tls_base. - if (seg.isTLS()) - return ".tdata"; - StringRef name = seg.getName(); - if (!config->mergeDataSegments) - return name; - if (name.startswith(".text.")) - return ".text"; - if (name.startswith(".data.")) - return ".data"; - if (name.startswith(".bss.")) - return ".bss"; - if (name.startswith(".rodata.")) - return ".rodata"; - return name; -} - OutputSegment *Writer::createOutputSegment(StringRef name) { LLVM_DEBUG(dbgs() << "new segment: " << name << "\n"); OutputSegment *s = make(name); @@ -889,7 +870,7 @@ for (InputChunk *segment : file->segments) { if (!segment->live) continue; - StringRef name = getOutputDataSegmentName(*segment); + StringRef name = segment->getOutputSegmentName(); OutputSegment *s = nullptr; // When running in relocatable mode we can't merge segments that are part // of comdat groups since the ultimate linker needs to be able exclude or