Index: test/wasm/weak-symbols.ll =================================================================== --- test/wasm/weak-symbols.ll +++ test/wasm/weak-symbols.ll @@ -24,7 +24,7 @@ ; CHECK-NEXT: ReturnType: I32 ; CHECK-NEXT: ParamTypes: ; CHECK-NEXT: - Type: FUNCTION -; CHECK-NEXT: FunctionTypes: [ 0, 0, 0, 0, 0 ] +; CHECK-NEXT: FunctionTypes: [ 0, 0, 0, 0 ] ; CHECK-NEXT: - Type: TABLE ; CHECK-NEXT: Tables: ; CHECK-NEXT: - ElemType: ANYFUNC @@ -59,7 +59,7 @@ ; CHECK-NEXT: Index: 2 ; CHECK-NEXT: - Name: exportWeak2 ; CHECK-NEXT: Kind: FUNCTION -; CHECK-NEXT: Index: 4 +; CHECK-NEXT: Index: 3 ; CHECK-NEXT: - Type: ELEM ; CHECK-NEXT: Segments: ; CHECK-NEXT: - Offset: @@ -79,9 +79,6 @@ ; CHECK-NEXT: Body: 4181808080000B ; CHECK-NEXT: - Index: 3 ; CHECK-NEXT: Locals: -; CHECK-NEXT: Body: 41020B -; CHECK-NEXT: - Index: 4 -; CHECK-NEXT: Locals: ; CHECK-NEXT: Body: 4181808080000B ; CHECK-NEXT: - Type: DATA ; CHECK-NEXT: Segments: @@ -90,10 +87,10 @@ ; CHECK-NEXT: Offset: ; CHECK-NEXT: Opcode: I32_CONST ; CHECK-NEXT: Value: 1024 -; CHECK-NEXT: Content: '0100000002000000' +; CHECK-NEXT: Content: '01000000' ; CHECK-NEXT: - Type: CUSTOM ; CHECK-NEXT: Name: linking -; CHECK-NEXT: DataSize: 8 +; CHECK-NEXT: DataSize: 4 ; CHECK-NEXT: - Type: CUSTOM ; CHECK-NEXT: Name: name ; CHECK-NEXT: FunctionNames: @@ -101,6 +98,6 @@ ; CHECK-NEXT: Name: _start ; CHECK-NEXT: - Index: 2 ; CHECK-NEXT: Name: exportWeak1 -; CHECK-NEXT: - Index: 4 +; CHECK-NEXT: - Index: 3 ; CHECK-NEXT: Name: exportWeak2 ; CHECK-NEXT: ... Index: wasm/InputChunks.h =================================================================== --- wasm/InputChunks.h +++ wasm/InputChunks.h @@ -22,6 +22,7 @@ using llvm::object::WasmSegment; using llvm::wasm::WasmFunction; +using llvm::wasm::WasmGlobal; using llvm::wasm::WasmRelocation; using llvm::wasm::WasmSignature; using llvm::object::WasmSection; @@ -43,6 +44,7 @@ virtual uint32_t getInputSectionOffset() const = 0; int32_t OutputOffset = 0; + bool Discarded = false; std::vector Relocations; std::vector OutRelocations; const ObjFile &File; @@ -84,6 +86,12 @@ uint32_t endVA() const { return startVA() + getSize(); } StringRef getName() const { return Segment.Data.Name; } + // Returns whether the InputSegment contains any data other than that + // referenced by the given global symbol. An InputSegment can contain (in + // theory) a mixture of static data and globals, although the frontend + // currently emits a single segment for each global. + bool isCoveredByGlobal(const WasmGlobal& Global) const; + protected: const WasmSegment &Segment; const OutputSegment *OutputSeg = nullptr; Index: wasm/InputChunks.cpp =================================================================== --- wasm/InputChunks.cpp +++ wasm/InputChunks.cpp @@ -1,4 +1,4 @@ -//===- InputSegment.cpp ---------------------------------------------------===// +//===- InputChunks.cpp ----------------------------------------------------===// // // The LLVM Linker // @@ -15,6 +15,15 @@ using namespace llvm; using namespace lld::wasm; +using llvm::wasm::ValType; + +static uint32_t getValueTypeBytes(int32_t Type) { + switch (static_cast(Type)) { + case ValType::I32: case ValType::F32: return 4; + case ValType::I64: case ValType::F64: return 8; + default: llvm_unreachable("unexpected type"); + } +} uint32_t InputSegment::translateVA(uint32_t Address) const { assert(Address >= startVA() && Address < endVA()); @@ -24,6 +33,12 @@ return Address + Delta; } +bool InputSegment::isCoveredByGlobal(const WasmGlobal& Global) const { + uint32_t GlobalAddr = Global.InitExpr.Value.Int32; + uint32_t GlobalSize = getValueTypeBytes(Global.Type); + return GlobalAddr == startVA() && GlobalAddr + GlobalSize == endVA(); +} + void InputChunk::copyRelocations(const WasmSection &Section) { size_t Start = getInputSectionOffset(); size_t Size = getSize(); Index: wasm/InputFiles.h =================================================================== --- wasm/InputFiles.h +++ wasm/InputFiles.h @@ -116,6 +116,7 @@ const WasmSignature *Signature = nullptr); void initializeSymbols(); InputSegment *getSegment(const WasmSymbol &WasmSym) const; + InputSegment *getSegment(uint32_t Address) const; const WasmSignature *getFunctionSig(const WasmSymbol &Sym) const; uint32_t getGlobalValue(const WasmSymbol &Sym) const; InputFunction *getFunction(const WasmSymbol &Sym) const; Index: wasm/InputFiles.cpp =================================================================== --- wasm/InputFiles.cpp +++ wasm/InputFiles.cpp @@ -14,6 +14,7 @@ #include "SymbolTable.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/Object/Binary.h" #include "llvm/Object/Wasm.h" #include "llvm/Support/raw_ostream.h" @@ -113,15 +114,19 @@ // Return the InputSegment in which a given symbol is defined. InputSegment *ObjFile::getSegment(const WasmSymbol &WasmSym) const { uint32_t Address = WasmObj->getWasmSymbolValue(WasmSym); - for (InputSegment *Segment : Segments) { - if (Address >= Segment->startVA() && Address < Segment->endVA()) { - DEBUG(dbgs() << "Found symbol in segment: " << WasmSym.Name << " -> " - << Segment->getName() << "\n"); + InputSegment *Segment = getSegment(Address); + if (!Segment) + fatal("symbol not found in any segment: " + WasmSym.Name); + DEBUG(dbgs() << "Found symbol in segment: " << WasmSym.Name << " -> " + << Segment->getName() << "\n"); + return Segment; +} +InputSegment *ObjFile::getSegment(uint32_t Address) const { + for (InputSegment *Segment : Segments) { + if (Address >= Segment->startVA() && Address < Segment->endVA()) return Segment; - } } - error("symbol not found in any segment: " + WasmSym.Name); return nullptr; } @@ -161,8 +166,13 @@ } } - FunctionSymbols.resize(NumFunctionImports + WasmObj->functions().size()); - GlobalSymbols.resize(NumGlobalImports + WasmObj->globals().size()); + ArrayRef Funcs = WasmObj->functions(); + ArrayRef FuncTypes = WasmObj->functionTypes(); + ArrayRef Types = WasmObj->types(); + ArrayRef Globals = WasmObj->globals(); + + FunctionSymbols.resize(NumFunctionImports + Funcs.size()); + GlobalSymbols.resize(NumGlobalImports + Globals.size()); for (const WasmSegment &S : WasmObj->dataSegments()) { InputSegment *Seg = make(S, *this); @@ -170,9 +180,6 @@ Segments.emplace_back(Seg); } - ArrayRef Funcs = WasmObj->functions(); - ArrayRef FuncTypes = WasmObj->functionTypes(); - ArrayRef Types = WasmObj->types(); for (size_t I = 0; I < Funcs.size(); ++I) { const WasmFunction &Func = Funcs[I]; const WasmSignature &Sig = Types[FuncTypes[I]]; @@ -183,6 +190,8 @@ // Populate `FunctionSymbols` and `GlobalSymbols` based on the WasmSymbols // in the object + SmallBitVector ReachedFunctions(Funcs.size(), false); + SmallBitVector ReachedGlobals(Globals.size(), false); for (const SymbolRef &Sym : WasmObj->symbols()) { const WasmSymbol &WasmSym = WasmObj->getWasmSymbol(Sym.getRawDataRefImpl()); Symbol *S; @@ -197,10 +206,14 @@ case WasmSymbol::SymbolType::GLOBAL_EXPORT: S = createDefined(WasmSym, Symbol::Kind::DefinedGlobalKind, getSegment(WasmSym), nullptr, getGlobalValue(WasmSym)); + if (S->getFile() == this) + ReachedGlobals[WasmSym.ElementIndex - NumGlobalImports] = true; break; case WasmSymbol::SymbolType::FUNCTION_EXPORT: S = createDefined(WasmSym, Symbol::Kind::DefinedFunctionKind, nullptr, getFunction(WasmSym)); + if (S->getFile() == this) + ReachedFunctions[WasmSym.ElementIndex - NumFunctionImports] = true; break; case WasmSymbol::SymbolType::DEBUG_FUNCTION_NAME: // These are for debugging only, no need to create linker symbols for them @@ -228,6 +241,26 @@ for (size_t I = 0; I < GlobalSymbols.size(); ++I) assert(GlobalSymbols[I] != nullptr);); + // This is a crude precursor to garbage collection, but it works really nicely + // for discarding multiple copies of inline (=Comdat/weak) symbols. If the + // function isn't reachable by any Symbol (because another file has defined + // the weak symbol), then it's discardable. We aren't considering here + // whether the Symbol is actually ever called. + for (uint32_t I = 0; I < Funcs.size(); ++I) { + if (!ReachedFunctions[I]) + Functions[I]->Discarded = true; + } + for (uint32_t I = 0; I < Globals.size(); ++I) { + // TODO It's crazy that we have to do a linear search through the segments + // for each global! We should *somehow* associate each global with its + // segment (and maybe each segment with its globals). NB. Using the + // GlobalSym wouldn't work, we want the segment from *this file* not the + // one stored on the Symbol, which could come from another file. + InputSegment *Seg = getSegment(Globals[I].InitExpr.Value.Int32); + if (!ReachedGlobals[I] && Seg->isCoveredByGlobal(Globals[I])) + Seg->Discarded = true; + } + // Populate `TableSymbols` with all symbols that are called indirectly uint32_t SegmentCount = WasmObj->elements().size(); if (SegmentCount) { Index: wasm/Writer.cpp =================================================================== --- wasm/Writer.cpp +++ wasm/Writer.cpp @@ -618,6 +618,8 @@ for (ObjFile *File : Symtab->ObjectFiles) { DEBUG(dbgs() << "Functions: " << File->getName() << "\n"); for (InputFunction *Func : File->Functions) { + if (Func->Discarded) + continue; DefinedFunctions.emplace_back(Func); Func->setOutputIndex(FunctionIndex++); } @@ -653,6 +655,8 @@ void Writer::createOutputSegments() { for (ObjFile *File : Symtab->ObjectFiles) { for (InputSegment *Segment : File->Segments) { + if (Segment->Discarded) + continue; StringRef Name = getOutputDataSegmentName(Segment->getName()); OutputSegment *&S = SegmentMap[Name]; if (S == nullptr) {