diff --git a/lld/test/wasm/Inputs/ctor-ctor.s b/lld/test/wasm/Inputs/ctor-ctor.s new file mode 100644 --- /dev/null +++ b/lld/test/wasm/Inputs/ctor-ctor.s @@ -0,0 +1,15 @@ + .section .text.def,"",@ + .globl def +def: + .functype def () -> () + end_function + + .section .text.test_ctor,"",@ + .globl test_ctor +test_ctor: + .functype test_ctor () -> () + end_function + + .section .init_array,"",@ + .p2align 2 + .int32 test_ctor diff --git a/lld/test/wasm/Inputs/ctor-lib.s b/lld/test/wasm/Inputs/ctor-lib.s new file mode 100644 --- /dev/null +++ b/lld/test/wasm/Inputs/ctor-lib.s @@ -0,0 +1,14 @@ + .section .text.lib_func,"",@ + .globl lib_func +lib_func: + .functype lib_func () -> () + end_function + + .section .text.unused_lib_func,"",@ + .globl unused_lib_func +unused_lib_func: + .functype unused_lib_func () -> () + call def + end_function + + .functype def () -> () diff --git a/lld/test/wasm/Inputs/ctor-setup-call-def.s b/lld/test/wasm/Inputs/ctor-setup-call-def.s new file mode 100644 --- /dev/null +++ b/lld/test/wasm/Inputs/ctor-setup-call-def.s @@ -0,0 +1,21 @@ +# Like Inputs/ctor-setup.s, except it calls `def` instead of `lib_func`, +# so it pulls in the .o file containing `ctor`. + + .section .text._start,"",@ + .globl _start +_start: + .functype _start () -> () + end_function + + .section .text.setup,"",@ + .globl setup +setup: + .functype setup () -> () + call def + end_function + + .section .init_array,"",@ + .p2align 2 + .int32 setup + + .functype def () -> () diff --git a/lld/test/wasm/Inputs/ctor-setup.s b/lld/test/wasm/Inputs/ctor-setup.s new file mode 100644 --- /dev/null +++ b/lld/test/wasm/Inputs/ctor-setup.s @@ -0,0 +1,21 @@ +# Like Inputs/ctor-start.s, except it calls `lib_func` from a ctor +# instead of from `_start`. + + .section .text._start,"",@ + .globl _start +_start: + .functype _start () -> () + end_function + + .section .text.setup,"",@ + .globl setup +setup: + .functype setup () -> () + call lib_func + end_function + + .section .init_array,"",@ + .p2align 2 + .int32 setup + + .functype lib_func () -> () diff --git a/lld/test/wasm/Inputs/ctor-start.s b/lld/test/wasm/Inputs/ctor-start.s new file mode 100644 --- /dev/null +++ b/lld/test/wasm/Inputs/ctor-start.s @@ -0,0 +1,7 @@ + .globl _start +_start: + .functype _start () -> () + call lib_func + end_function + + .functype lib_func () -> () diff --git a/lld/test/wasm/ctor-gc-setup.test b/lld/test/wasm/ctor-gc-setup.test new file mode 100644 --- /dev/null +++ b/lld/test/wasm/ctor-gc-setup.test @@ -0,0 +1,12 @@ +; Like ctor-gc.test, but main object calls a function from its constructor, +; which shouldn't matter; `ctor` shouldn't be pulled in. +; +; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-ctor.s -o %t.ctor.o +; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-lib.s -o %t.lib.o +; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-setup.s -o %t.setup.o +; RUN: rm -f %t.lib.a +; RUN: llvm-ar rcs %t.lib.a %t.lib.o %t.ctor.o +; RUN: wasm-ld %t.setup.o %t.lib.a -o %t.wasm +; RUN: obj2yaml %t.wasm | FileCheck %s + +; CHECK-NOT: Name: test_ctor diff --git a/lld/test/wasm/ctor-gc.test b/lld/test/wasm/ctor-gc.test new file mode 100644 --- /dev/null +++ b/lld/test/wasm/ctor-gc.test @@ -0,0 +1,12 @@ +; Verify that constructors from a .o file which it initially depends on but +; doesn't ultimately contribute to the final link are not included. +; +; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-ctor.s -o %t.ctor.o +; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-lib.s -o %t.lib.o +; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-start.s -o %t.start.o +; RUN: rm -f %t.lib.a +; RUN: llvm-ar rcs %t.lib.a %t.lib.o %t.ctor.o +; RUN: wasm-ld %t.start.o %t.lib.a -o %t.wasm +; RUN: obj2yaml %t.wasm | FileCheck %s + +; CHECK-NOT: __wasm_call_ctors diff --git a/lld/test/wasm/ctor-no-gc.test b/lld/test/wasm/ctor-no-gc.test new file mode 100644 --- /dev/null +++ b/lld/test/wasm/ctor-no-gc.test @@ -0,0 +1,12 @@ +; Like ctor-gc-setup.test, but it calls a different function, so it does pull +; in the object containing `ctor`, so `ctor` is linked in. +; +; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-ctor.s -o %t.ctor.o +; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-lib.s -o %t.lib.o +; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-setup-call-def.s -o %t.setup-call-def.o +; RUN: rm -f %t.lib.a +; RUN: llvm-ar rcs %t.lib.a %t.lib.o %t.ctor.o +; RUN: wasm-ld %t.setup-call-def.o %t.lib.a -o %t.wasm +; RUN: obj2yaml %t.wasm | FileCheck %s + +; CHECK: Name: test_ctor diff --git a/lld/wasm/InputFiles.h b/lld/wasm/InputFiles.h --- a/lld/wasm/InputFiles.h +++ b/lld/wasm/InputFiles.h @@ -60,8 +60,14 @@ MutableArrayRef getMutableSymbols() { return symbols; } + // An InputFile is considered live if any of the symbols defined by it + // are live. + void markLive() { live = true; } + bool isLive() const { return live; } + protected: - InputFile(Kind k, MemoryBufferRef m) : mb(m), fileKind(k) {} + InputFile(Kind k, MemoryBufferRef m) + : mb(m), fileKind(k), live(!config->gcSections) {} MemoryBufferRef mb; // List of all symbols referenced or defined by this file. @@ -69,6 +75,7 @@ private: const Kind fileKind; + bool live; }; // .a file (ar archive) @@ -92,6 +99,10 @@ explicit ObjFile(MemoryBufferRef m, StringRef archiveName) : InputFile(ObjectKind, m) { this->archiveName = std::string(archiveName); + + // If this isn't part of an archive, it's eagerly linked, so mark it live. + if (archiveName.empty()) + markLive(); } static bool classof(const InputFile *f) { return f->kind() == ObjectKind; } @@ -156,6 +167,10 @@ explicit BitcodeFile(MemoryBufferRef m, StringRef archiveName) : InputFile(BitcodeKind, m) { this->archiveName = std::string(archiveName); + + // If this isn't part of an archive, it's eagerly linked, so mark it live. + if (archiveName.empty()) + markLive(); } static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; } diff --git a/lld/wasm/MarkLive.cpp b/lld/wasm/MarkLive.cpp --- a/lld/wasm/MarkLive.cpp +++ b/lld/wasm/MarkLive.cpp @@ -42,6 +42,7 @@ private: void enqueue(Symbol *sym); + void enqueueInitFunctions(const ObjFile *sym); void markSymbol(Symbol *sym); void mark(); bool isCallCtorsLive(); @@ -56,11 +57,35 @@ if (!sym || sym->isLive()) return; LLVM_DEBUG(dbgs() << "markLive: " << sym->getName() << "\n"); + + InputFile *file = sym->getFile(); + bool needInitFunctions = file && !file->isLive() && sym->isDefined(); + sym->markLive(); + + // Mark ctor functions in the object that defines this symbol live. + // The ctor functions are all referenced by the synthetic callCtors + // function. However, this function does not contain relocations so we + // have to manually mark the ctors as live. + if (needInitFunctions) + enqueueInitFunctions(cast(file)); + if (InputChunk *chunk = sym->getChunk()) queue.push_back(chunk); } +// The ctor functions are all referenced by the synthetic callCtors +// function. However, this function does not contain relocations so we +// have to manually mark the ctors as live. +void MarkLive::enqueueInitFunctions(const ObjFile *obj) { + const WasmLinkingData &l = obj->getWasmObj()->linkingData(); + for (const WasmInitFunc &f : l.InitFunctions) { + auto *initSym = obj->getFunctionSymbol(f.Symbol); + if (!initSym->isDiscarded()) + enqueue(initSym); + } +} + void MarkLive::run() { // Add GC root symbols. if (!config->entry.empty()) @@ -75,31 +100,24 @@ if (Symbol *callDtors = WasmSym::callDtors) enqueue(callDtors); - // The ctor functions are all referenced by the synthetic callCtors - // function. However, this function does not contain relocations so we - // have to manually mark the ctors as live. - for (const ObjFile *obj : symtab->objectFiles) { - const WasmLinkingData &l = obj->getWasmObj()->linkingData(); - for (const WasmInitFunc &f : l.InitFunctions) { - auto *initSym = obj->getFunctionSymbol(f.Symbol); - if (!initSym->isDiscarded()) - enqueue(initSym); - } - } - // In Emscripten-style PIC, `__wasm_call_ctors` calls `__wasm_apply_relocs`. if (config->isPic) enqueue(WasmSym::applyRelocs); - // If we have any non-discarded init functions, mark `__wasm_call_ctors` as - // live so that we assign it an index and call it. - if (isCallCtorsLive()) - enqueue(WasmSym::callCtors); - if (config->sharedMemory && !config->shared) enqueue(WasmSym::initMemory); + // Enqueue constructors in objects explicitly live from the command-line. + for (const ObjFile *obj : symtab->objectFiles) + if (obj->isLive()) + enqueueInitFunctions(obj); + mark(); + + // If we have any non-discarded init functions, mark `__wasm_call_ctors` as + // live so that we assign it an index and call it. + if (isCallCtorsLive()) + WasmSym::callCtors->markLive(); } void MarkLive::mark() { @@ -181,9 +199,11 @@ // it can call them. for (const ObjFile *file : symtab->objectFiles) { const WasmLinkingData &l = file->getWasmObj()->linkingData(); - for (const WasmInitFunc &f : l.InitFunctions) - if (!file->getFunctionSymbol(f.Symbol)->isDiscarded()) + for (const WasmInitFunc &f : l.InitFunctions) { + auto *sym = file->getFunctionSymbol(f.Symbol); + if (!sym->isDiscarded() && sym->isLive()) return true; + } } return false; diff --git a/lld/wasm/Symbols.cpp b/lld/wasm/Symbols.cpp --- a/lld/wasm/Symbols.cpp +++ b/lld/wasm/Symbols.cpp @@ -132,6 +132,8 @@ void Symbol::markLive() { assert(!isDiscarded()); + if (file != NULL) + file->markLive(); if (auto *g = dyn_cast(this)) g->global->live = true; if (auto *e = dyn_cast(this)) diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -1104,9 +1104,8 @@ for (const WasmInitFunc &f : l.InitFunctions) { FunctionSymbol *sym = file->getFunctionSymbol(f.Symbol); // comdat exclusions can cause init functions be discarded. - if (sym->isDiscarded()) + if (sym->isDiscarded() || !sym->isLive()) continue; - assert(sym->isLive()); if (sym->signature->Params.size() != 0) error("constructor functions cannot take arguments: " + toString(*sym)); LLVM_DEBUG(dbgs() << "initFunctions: " << toString(*sym) << "\n");