Index: test/wasm/call-indirect.ll =================================================================== --- test/wasm/call-indirect.ll +++ test/wasm/call-indirect.ll @@ -1,6 +1,6 @@ ; RUN: llc -filetype=obj %p/Inputs/call-indirect.ll -o %t2.o ; RUN: llc -filetype=obj %s -o %t.o -; RUN: lld -flavor wasm -o %t.wasm %t2.o %t.o +; RUN: lld -flavor wasm -no-gc-sections -o %t.wasm %t2.o %t.o ; RUN: obj2yaml %t.wasm | FileCheck %s ; bitcode generated from the following C code: Index: test/wasm/comdats.ll =================================================================== --- test/wasm/comdats.ll +++ test/wasm/comdats.ll @@ -1,7 +1,7 @@ ; RUN: llc -filetype=obj -mtriple=wasm32-unknown-uknown-wasm %p/Inputs/comdat1.ll -o %t1.o ; RUN: llc -filetype=obj -mtriple=wasm32-unknown-uknown-wasm %p/Inputs/comdat2.ll -o %t2.o ; RUN: llc -filetype=obj -mtriple=wasm32-unknown-uknown-wasm %s -o %t.o -; RUN: lld -flavor wasm -o %t.wasm %t.o %t1.o %t2.o +; RUN: lld -flavor wasm -no-gc-sections -o %t.wasm %t.o %t1.o %t2.o ; RUN: obj2yaml %t.wasm | FileCheck %s target triple = "wasm32-unknown-unknown-wasm" Index: test/wasm/data-layout.ll =================================================================== --- test/wasm/data-layout.ll +++ test/wasm/data-layout.ll @@ -9,7 +9,7 @@ @hello_str = external global i8* @external_ref = global i8** @hello_str, align 8 -; RUN: lld -flavor wasm --allow-undefined -o %t.wasm %t.o %t.hello.o +; RUN: lld -flavor wasm -no-gc-sections --allow-undefined -o %t.wasm %t.o %t.hello.o ; RUN: obj2yaml %t.wasm | FileCheck %s ; CHECK: - Type: GLOBAL Index: test/wasm/entry.ll =================================================================== --- test/wasm/entry.ll +++ test/wasm/entry.ll @@ -33,4 +33,4 @@ ; CHECK-CTOR-NEXT: Index: 0 ; CHECK-CTOR-NEXT: - Name: __wasm_call_ctors ; CHECK-CTOR-NEXT: Kind: FUNCTION -; CHECK-CTOR-NEXT: Index: 1 +; CHECK-CTOR-NEXT: Index: 0 Index: test/wasm/gc-sections.ll =================================================================== --- /dev/null +++ test/wasm/gc-sections.ll @@ -0,0 +1,80 @@ +; RUN: llc -filetype=obj %s -o %t.o + +; RUN: lld -flavor wasm -print-gc-sections -o %t1.wasm %t.o | FileCheck %s -check-prefix=PRINT-GC +; PRINT-GC: removing unused section 'unused_function' in file '{{.*}}' +; PRINT-GC-NOT: removing unused section 'used_function' in file '{{.*}}' +; PRINT-GC: removing unused section '.data.unused_data' in file '{{.*}}' +; PRINT-GC-NOT: removing unused section '.data.used_data' in file '{{.*}}' + +; RUN: obj2yaml %t1.wasm | FileCheck %s + +; RUN: lld -flavor wasm -print-gc-sections --no-gc-sections -o %t1.no-gc.wasm %t.o + +; RUN: obj2yaml %t1.no-gc.wasm | FileCheck %s -check-prefix=NO-GC + +target triple = "wasm32-unknown-unknown-wasm" + +@unused_data = hidden global i32 1, align 4 +@used_data = hidden global i32 2, align 4 + +define hidden i32 @unused_function() { + %1 = load i32, i32* @unused_data, align 4 + ret i32 %1 +} + +define hidden i32 @used_function() { + %1 = load i32, i32* @used_data, align 4 + ret i32 %1 +} + +define hidden void @_start() { +entry: + call i32 @used_function() + ret void +} + +; CHECK: - Type: DATA +; CHECK-NEXT: Segments: +; CHECK-NEXT: - SectionOffset: 7 +; CHECK-NEXT: MemoryIndex: 0 +; CHECK-NEXT: Offset: +; CHECK-NEXT: Opcode: I32_CONST +; CHECK-NEXT: Value: 1024 +; CHECK-NEXT: Content: '02000000' +; CHECK-NEXT: - Type: CUSTOM +; CHECK-NEXT: Name: linking +; CHECK-NEXT: DataSize: 4 +; CHECK-NEXT: - Type: CUSTOM +; CHECK-NEXT: Name: name +; CHECK-NEXT: FunctionNames: +; CHECK-NEXT: - Index: 0 +; CHECK-NEXT: Name: used_function +; CHECK-NEXT: - Index: 1 +; CHECK-NEXT: Name: _start +; CHECK-NEXT: - Index: 2 +; CHECK-NEXT: Name: __wasm_call_ctors +; CHECK-NEXT: ... + +; NO-GC: - Type: DATA +; NO-GC-NEXT: Segments: +; NO-GC-NEXT: - SectionOffset: 7 +; NO-GC-NEXT: MemoryIndex: 0 +; NO-GC-NEXT: Offset: +; NO-GC-NEXT: Opcode: I32_CONST +; NO-GC-NEXT: Value: 1024 +; NO-GC-NEXT: Content: '0100000002000000' +; NO-GC-NEXT: - Type: CUSTOM +; NO-GC-NEXT: Name: linking +; NO-GC-NEXT: DataSize: 8 +; NO-GC-NEXT: - Type: CUSTOM +; NO-GC-NEXT: Name: name +; NO-GC-NEXT: FunctionNames: +; NO-GC-NEXT: - Index: 0 +; NO-GC-NEXT: Name: unused_function +; NO-GC-NEXT: - Index: 1 +; NO-GC-NEXT: Name: used_function +; NO-GC-NEXT: - Index: 2 +; NO-GC-NEXT: Name: _start +; NO-GC-NEXT: - Index: 3 +; NO-GC-NEXT: Name: __wasm_call_ctors +; NO-GC-NEXT: ... Index: test/wasm/local-symbols.ll =================================================================== --- test/wasm/local-symbols.ll +++ test/wasm/local-symbols.ll @@ -9,11 +9,13 @@ define internal i32 @baz() local_unnamed_addr { entry: - ret i32 2 + %0 = load i32, i32* @bar, align 4 + ret i32 %0 } define i32 @_start() local_unnamed_addr { entry: + call i32 @baz() ret i32 1 } @@ -70,10 +72,10 @@ ; CHECK-NEXT: Functions: ; CHECK-NEXT: - Index: 0 ; CHECK-NEXT: Locals: -; CHECK-NEXT: Body: 41020B +; CHECK-NEXT: Body: 4100280284888080000B ; CHECK-NEXT: - Index: 1 ; CHECK-NEXT: Locals: -; CHECK-NEXT: Body: 41010B +; CHECK-NEXT: Body: 1080808080001A41010B ; CHECK-NEXT: - Index: 2 ; CHECK-NEXT: Locals: ; CHECK-NEXT: Body: 0B Index: test/wasm/weak-symbols.ll =================================================================== --- test/wasm/weak-symbols.ll +++ test/wasm/weak-symbols.ll @@ -1,7 +1,7 @@ ; RUN: llc -filetype=obj %p/Inputs/weak-symbol1.ll -o %t1.o ; RUN: llc -filetype=obj %p/Inputs/weak-symbol2.ll -o %t2.o ; RUN: llc -filetype=obj %s -o %t.o -; RUN: lld -flavor wasm -o %t.wasm %t.o %t1.o %t2.o +; RUN: lld -flavor wasm -no-gc-sections -o %t.wasm %t.o %t1.o %t2.o ; RUN: obj2yaml %t.wasm | FileCheck %s target triple = "wasm32-unknown-unknown-wasm" Index: wasm/Config.h =================================================================== --- wasm/Config.h +++ wasm/Config.h @@ -23,7 +23,9 @@ bool AllowUndefined; bool CheckSignatures; bool Demangle; + bool GcSections; bool ImportMemory; + bool PrintGcSections; bool Relocatable; bool StripAll; bool StripDebug; Index: wasm/Driver.cpp =================================================================== --- wasm/Driver.cpp +++ wasm/Driver.cpp @@ -11,6 +11,7 @@ #include "Config.h" #include "SymbolTable.h" #include "Writer.h" +#include "InputChunks.h" #include "lld/Common/Args.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" @@ -23,6 +24,8 @@ #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" +#define DEBUG_TYPE "lld" + using namespace llvm; using namespace llvm::sys; using namespace llvm::wasm; @@ -73,6 +76,7 @@ Config = make(); Symtab = make(); + InputChunks.clear(); LinkerDriver().link(Args); @@ -222,6 +226,94 @@ return Arg->getValue(); } +static void +forEachSuccessor(InputChunk &Chunk, + std::function Fn) { + DEBUG(dbgs() << "forEachSuccessor: " << Chunk.getName() << "\n"); + for (const WasmRelocation Reloc: Chunk.getRelocations()) { + Symbol* Sym = nullptr; + switch (Reloc.Type) { + case R_WEBASSEMBLY_FUNCTION_INDEX_LEB: + case R_WEBASSEMBLY_TABLE_INDEX_I32: + case R_WEBASSEMBLY_TABLE_INDEX_SLEB: + Sym = Chunk.File->getFunctionSymbol(Reloc.Index); + break; + case R_WEBASSEMBLY_GLOBAL_INDEX_LEB: + case R_WEBASSEMBLY_MEMORY_ADDR_LEB: + case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: + case R_WEBASSEMBLY_MEMORY_ADDR_I32: + Sym = Chunk.File->getGlobalSymbol(Reloc.Index); + break; + default: + return; + } + Fn(Sym->getChunk()); + } +} + +// This file implements --gc-sections, which is a feature to remove unused +// chunks from output. Unused chunks are sections that are not reachable +// from known GC-root symbols or chunks. This feature is implemented as a +// mark-sweep garbage collector. +// +// Here's how it works. Each InputChunk has a "Live" bit. The bit is off +// by default. Starting with GC-root symbols or chunks, visit all reachable +// sections to set their Live bits. Writer will then ignore sections whose +// Live bits are off, so that such sections are not included into output. +static void markLive() { + DEBUG(dbgs() << "markLive\n"); + + if (!Config->GcSections) + return; + + SmallVector Q; + + auto Enqueue = [&](InputChunk *Chunk) { + if (!Chunk || Chunk->Live) + return; + DEBUG(dbgs() << "Enqueue: " << Chunk->getName() << "\n"); + Chunk->Live = true; + Q.push_back(Chunk); + }; + + auto MarkSymbol = [&](Symbol *Sym) { + DEBUG(dbgs() << "MarkSymbol: " << Sym << "\n"); + if (Sym && Sym->isDefined()) { + DEBUG(dbgs() << "MarkSymbol: " << Sym->getName() << "\n"); + Enqueue(Sym->getChunk()); + } + }; + + // Add GC root symbols. + if (!Config->Entry.empty()) + MarkSymbol(Symtab->find(Config->Entry)); + MarkSymbol(Config->CtorSymbol); + + // By default we export all non-hidden, so they are gc roots too + for (Symbol* Sym: Symtab->getSymbols()) + if (!Sym->isHidden()) + MarkSymbol(Sym); + + // The ctor fuctions are all used the synthetic __wasm_call_ctors function, + // but since this function is created in-place it doesn't contain reloctations + // which mean we have to manually mark the ctors. + for (ObjFile *File : Symtab->ObjectFiles) { + const WasmLinkingData &L = File->getWasmObj()->linkingData(); + for (const WasmInitFunc &F : L.InitFunctions) + Enqueue(File->getFunctionSymbol(F.FunctionIndex)->getChunk()); + } + + while (!Q.empty()) + forEachSuccessor(*Q.pop_back_val(), Enqueue); + + // Report garbage-collected sections. + if (Config->PrintGcSections) + for (InputChunk *Sec : InputChunks) + if (!Sec->Live) + message("removing unused section '" + Sec->getName() + "' in file '" + + Sec->getFileName() + "'"); +} + void LinkerDriver::link(ArrayRef ArgsArr) { WasmOptTable Parser; opt::InputArgList Args = Parser.parse(ArgsArr.slice(1)); @@ -250,9 +342,14 @@ Config->CheckSignatures = Args.hasFlag(OPT_check_signatures, OPT_no_check_signatures, false); Config->Entry = getEntry(Args, Args.hasArg(OPT_relocatable) ? "" : "_start"); + errorHandler().FatalWarnings = + Args.hasFlag(OPT_fatal_warnings, OPT_no_fatal_warnings, false); Config->ImportMemory = Args.hasArg(OPT_import_memory); Config->OutputFile = Args.getLastArgValue(OPT_o); Config->Relocatable = Args.hasArg(OPT_relocatable); + Config->GcSections = Args.hasFlag(OPT_gc_sections, OPT_no_gc_sections, !Config->Relocatable); + Config->PrintGcSections = + Args.hasFlag(OPT_print_gc_sections, OPT_no_print_gc_sections, false); Config->SearchPaths = args::getStrings(Args, OPT_L); Config->StripAll = Args.hasArg(OPT_strip_all); Config->StripDebug = Args.hasArg(OPT_strip_debug); @@ -274,13 +371,15 @@ if (!Args.hasArg(OPT_INPUT)) error("no input files"); - if (Config->Relocatable && !Config->Entry.empty()) - error("entry point specified for relocatable output file"); - if (Config->Relocatable && Args.hasArg(OPT_undefined)) - error("undefined symbols specified for relocatable output file"); - Symbol *EntrySym = nullptr; - if (!Config->Relocatable) { + if (Config->Relocatable) { + if (!Config->Entry.empty()) + error("entry point specified for relocatable output file"); + if (Config->GcSections) + error("-r and --gc-sections may not be used together"); + if (Args.hasArg(OPT_undefined)) + error("-r -and --undefined may not be used together"); + } else { static WasmSignature Signature = {{}, WASM_TYPE_NORESULT}; if (!Config->Entry.empty()) EntrySym = Symtab->addUndefinedFunction(Config->Entry, &Signature); @@ -310,9 +409,17 @@ // Add all files to the symbol table. This will add almost all // symbols that we need to the symbol table. - for (InputFile *F : Files) + for (InputFile *F : Files) { Symtab->addFile(F); + if (auto *Obj = dyn_cast(F)) { + for (InputChunk *C : Obj->Functions) + InputChunks.push_back(C); + for (InputChunk *C : Obj->Segments) + InputChunks.push_back(C); + } + } + // Make sure we have resolved all symbols. if (!Config->Relocatable && !Config->AllowUndefined) { Symtab->reportRemainingUndefines(); @@ -345,6 +452,9 @@ if (errorCount()) return; + // Do size optimizations: garbage collection + markLive(); + // Write the result to the file. writeResult(); } Index: wasm/InputChunks.h =================================================================== --- wasm/InputChunks.h +++ wasm/InputChunks.h @@ -58,9 +58,14 @@ bool Discarded = false; std::vector OutRelocations; + const ObjFile *File; + + // The garbage collector sets sections' Live bits. + // If GC is disabled, all sections are considered live by default. + unsigned Live : 1; protected: - InputChunk(const ObjFile *F, Kind K) : File(F), SectionKind(K) {} + InputChunk(const ObjFile *F, Kind K) : File(F), Live(!Config->GcSections), SectionKind(K) {} virtual ~InputChunk() = default; void calcRelocations(); virtual ArrayRef data() const = 0; @@ -68,7 +73,6 @@ std::vector Relocations; int32_t OutputOffset = 0; - const ObjFile *File; Kind SectionKind; }; @@ -168,6 +172,8 @@ ArrayRef Body; }; +extern std::vector InputChunks; + } // namespace wasm } // namespace lld Index: wasm/InputChunks.cpp =================================================================== --- wasm/InputChunks.cpp +++ wasm/InputChunks.cpp @@ -21,6 +21,8 @@ using namespace lld; using namespace lld::wasm; +std::vector lld::wasm::InputChunks; + uint32_t InputSegment::translateVA(uint32_t Address) const { assert(Address >= startVA() && Address < endVA()); int32_t Delta = OutputSeg->StartVA + OutputSegmentOffset - startVA(); Index: wasm/InputFiles.h =================================================================== --- wasm/InputFiles.h +++ wasm/InputFiles.h @@ -110,6 +110,10 @@ return FunctionSymbols[Index]; } + Symbol *getGlobalSymbol(uint32_t Index) const { + return GlobalSymbols[Index]; + } + private: uint32_t relocateVirtualAddress(uint32_t Index) const; uint32_t relocateTypeIndex(uint32_t Original) const; Index: wasm/InputFiles.cpp =================================================================== --- wasm/InputFiles.cpp +++ wasm/InputFiles.cpp @@ -51,11 +51,11 @@ } uint32_t ObjFile::relocateVirtualAddress(uint32_t GlobalIndex) const { - return GlobalSymbols[GlobalIndex]->getVirtualAddress(); + return getGlobalSymbol(GlobalIndex)->getVirtualAddress(); } uint32_t ObjFile::relocateFunctionIndex(uint32_t Original) const { - Symbol *Sym = FunctionSymbols[Original]; + const Symbol *Sym = getFunctionSymbol(Original); uint32_t Index = Sym->getOutputIndex(); DEBUG(dbgs() << "relocateFunctionIndex: " << toString(*Sym) << ": " << Original << " -> " << Index << "\n"); @@ -67,7 +67,7 @@ } uint32_t ObjFile::relocateTableIndex(uint32_t Original) const { - Symbol *Sym = FunctionSymbols[Original]; + const Symbol *Sym = getFunctionSymbol(Original); uint32_t Index = Sym->hasTableIndex() ? Sym->getTableIndex() : 0; DEBUG(dbgs() << "relocateTableIndex: " << toString(*Sym) << ": " << Original << " -> " << Index << "\n"); @@ -75,7 +75,7 @@ } uint32_t ObjFile::relocateGlobalIndex(uint32_t Original) const { - Symbol *Sym = GlobalSymbols[Original]; + const Symbol *Sym = getGlobalSymbol(Original); uint32_t Index = Sym->hasOutputIndex() ? Sym->getOutputIndex() : 0; DEBUG(dbgs() << "relocateGlobalIndex: " << toString(*Sym) << ": " << Original << " -> " << Index << "\n"); Index: wasm/Options.td =================================================================== --- wasm/Options.td +++ wasm/Options.td @@ -27,6 +27,9 @@ def fatal_warnings: F<"fatal-warnings">, HelpText<"Treat warnings as errors">; +def gc_sections: F<"gc-sections">, + HelpText<"Enable garbage collection of unused sections">; + def help: F<"help">, HelpText<"Print option help">; def l: JoinedOrSeparate<["-"], "l">, MetaVarName<"">, @@ -45,9 +48,18 @@ def no_fatal_warnings: F<"no-fatal-warnings">; +def no_gc_sections: F<"no-gc-sections">, + HelpText<"Disable garbage collection of unused sections">; + +def no_print_gc_sections: F<"no-print-gc-sections">, + HelpText<"Do not list removed unused sections">; + def o: JoinedOrSeparate<["-"], "o">, MetaVarName<"">, HelpText<"Path to file to write output">; +def print_gc_sections: F<"print-gc-sections">, + HelpText<"List removed unused sections">; + def relocatable: F<"relocatable">, HelpText<"Create relocatable object file">; def strip_all: F<"strip-all">, HelpText<"Strip all symbols">; Index: wasm/Symbols.h =================================================================== --- wasm/Symbols.h +++ wasm/Symbols.h @@ -13,6 +13,7 @@ #include "lld/Common/LLVM.h" #include "llvm/Object/Archive.h" #include "llvm/Object/Wasm.h" +#include "Config.h" using llvm::object::Archive; using llvm::wasm::WasmSignature; @@ -37,7 +38,8 @@ InvalidKind, }; - Symbol(StringRef Name, uint32_t Flags) : Flags(Flags), Name(Name) {} + Symbol(StringRef Name, uint32_t Flags) : + Used(!Config->GcSections), Flags(Flags), Name(Name) {} Kind getKind() const { return SymbolKind; } @@ -97,7 +99,11 @@ void setArchiveSymbol(const Archive::Symbol &Sym) { ArchiveSymbol = Sym; } const Archive::Symbol &getArchiveSymbol() { return ArchiveSymbol; } + // True if a symbol is used from a live section. + unsigned Used : 1; + protected: + uint32_t Flags; uint32_t VirtualAddress = 0; Index: wasm/Writer.cpp =================================================================== --- wasm/Writer.cpp +++ wasm/Writer.cpp @@ -646,6 +646,8 @@ for (Symbol *Sym : File->getSymbols()) { if (!Sym->isDefined() || File != Sym->getFile()) continue; + if (!Sym->getChunk()->Live) + continue; if (Sym->isGlobal()) continue; if (Sym->getChunk()->Discarded) @@ -735,7 +737,7 @@ for (ObjFile *File : Symtab->ObjectFiles) { DEBUG(dbgs() << "Functions: " << File->getName() << "\n"); for (InputFunction *Func : File->Functions) { - if (Func->Discarded) + if (Func->Discarded || !Func->Live) continue; DefinedFunctions.emplace_back(Func); Func->setOutputIndex(FunctionIndex++); @@ -784,7 +786,7 @@ void Writer::createOutputSegments() { for (ObjFile *File : Symtab->ObjectFiles) { for (InputSegment *Segment : File->Segments) { - if (Segment->Discarded) + if (Segment->Discarded || !Segment->Live) continue; StringRef Name = getOutputDataSegmentName(Segment->getName()); OutputSegment *&S = SegmentMap[Name]; @@ -842,9 +844,11 @@ for (ObjFile *File : Symtab->ObjectFiles) { const WasmLinkingData &L = File->getWasmObj()->linkingData(); InitFunctions.reserve(InitFunctions.size() + L.InitFunctions.size()); - for (const WasmInitFunc &F : L.InitFunctions) + for (const WasmInitFunc &F : L.InitFunctions) { InitFunctions.emplace_back(WasmInitFunc{ F.Priority, File->relocateFunctionIndex(F.FunctionIndex)}); + File->getFunctionSymbol(F.FunctionIndex)->getChunk()->Live = true; + } } // Sort in order of priority (lowest first) so that they are called // in the correct order.