Index: lld/trunk/test/wasm/compress-relocs.ll =================================================================== --- lld/trunk/test/wasm/compress-relocs.ll +++ lld/trunk/test/wasm/compress-relocs.ll @@ -0,0 +1,22 @@ +; RUN: llc -filetype=obj %p/Inputs/call-indirect.ll -o %t2.o +; RUN: llc -filetype=obj %s -o %t.o +; RUN: wasm-ld -o %t.wasm %t2.o %t.o +; RUN: obj2yaml %t.wasm | FileCheck %s + +; RUN: wasm-ld -O2 -o %t-compressed.wasm %t2.o %t.o +; RUN: obj2yaml %t-compressed.wasm | FileCheck %s -check-prefix=COMPRESS + +target triple = "wasm32-unknown-unknown-wasm" + +define i32 @foo() { +entry: + ret i32 2 +} + +define void @_start() local_unnamed_addr { +entry: + ret void +} + +; CHECK: Body: 4100280284888080002100410028028088808000118080808000001A2000118180808000001A0B +; COMPRESS: Body: 41002802840821004100280280081100001A20001101001A0B Index: lld/trunk/wasm/Config.h =================================================================== --- lld/trunk/wasm/Config.h +++ lld/trunk/wasm/Config.h @@ -19,6 +19,7 @@ struct Configuration { bool AllowUndefined; + bool CompressRelocTargets; bool Demangle; bool ExportTable; bool GcSections; @@ -33,6 +34,7 @@ uint32_t GlobalBase; uint32_t InitialMemory; uint32_t MaxMemory; + uint32_t Optimize; uint32_t ZStackSize; llvm::StringRef Entry; llvm::StringRef OutputFile; Index: lld/trunk/wasm/Driver.cpp =================================================================== --- lld/trunk/wasm/Driver.cpp +++ lld/trunk/wasm/Driver.cpp @@ -290,6 +290,7 @@ Args.hasFlag(OPT_fatal_warnings, OPT_no_fatal_warnings, false); Config->ImportMemory = Args.hasArg(OPT_import_memory); Config->ImportTable = Args.hasArg(OPT_import_table); + Config->Optimize = args::getInteger(Args, OPT_O, 0); Config->OutputFile = Args.getLastArgValue(OPT_o); Config->Relocatable = Args.hasArg(OPT_relocatable); Config->GcSections = @@ -312,6 +313,8 @@ Config->ZStackSize = args::getZOptionValue(Args, OPT_z, "stack-size", WasmPageSize); + Config->CompressRelocTargets = Config->Optimize > 0 && !Config->Relocatable; + if (auto *Arg = Args.getLastArg(OPT_allow_undefined_file)) readImportFile(Arg->getValue()); Index: lld/trunk/wasm/InputChunks.h =================================================================== --- lld/trunk/wasm/InputChunks.h +++ lld/trunk/wasm/InputChunks.h @@ -48,11 +48,11 @@ Kind kind() const { return SectionKind; } - uint32_t getSize() const { return data().size(); } + virtual uint32_t getSize() const { return data().size(); } void copyRelocations(const WasmSection &Section); - void writeTo(uint8_t *SectionStart) const; + virtual void writeTo(uint8_t *SectionStart) const; ArrayRef getRelocations() const { return Relocations; } @@ -78,6 +78,7 @@ virtual ~InputChunk() = default; virtual ArrayRef data() const = 0; virtual uint32_t getInputSectionOffset() const = 0; + virtual uint32_t getInputSize() const { return getSize(); }; // Verifies the existing data at relocation targets matches our expectations. // This is performed only debug builds as an extra sanity check. @@ -131,11 +132,19 @@ C->kind() == InputChunk::SyntheticFunction; } + void writeTo(uint8_t *SectionStart) const override; StringRef getName() const override { return Function->SymbolName; } StringRef getDebugName() const override { return Function->DebugName; } uint32_t getComdat() const override { return Function->Comdat; } uint32_t getFunctionInputOffset() const { return getInputSectionOffset(); } uint32_t getFunctionCodeOffset() const { return Function->CodeOffset; } + uint32_t getSize() const override { + if (Config->CompressRelocTargets && File) { + assert(CompressedSize); + return CompressedSize; + } + return data().size(); + } uint32_t getFunctionIndex() const { return FunctionIndex.getValue(); } bool hasFunctionIndex() const { return FunctionIndex.hasValue(); } void setFunctionIndex(uint32_t Index); @@ -143,13 +152,23 @@ bool hasTableIndex() const { return TableIndex.hasValue(); } void setTableIndex(uint32_t Index); + // The size of a given input function can depend on the values of the + // LEB relocations within it. This finalizeContents method is called after + // all the symbol values have be calcualted but before getSize() is ever + // called. + void calculateSize(); + const WasmSignature &Signature; protected: ArrayRef data() const override { + assert(!Config->CompressRelocTargets); return File->CodeSection->Content.slice(getInputSectionOffset(), Function->Size); } + + uint32_t getInputSize() const override { return Function->Size; } + uint32_t getInputSectionOffset() const override { return Function->CodeSectionOffset; } @@ -157,6 +176,8 @@ const WasmFunction *Function; llvm::Optional FunctionIndex; llvm::Optional TableIndex; + uint32_t CompressedFuncSize = 0; + uint32_t CompressedSize = 0; }; class SyntheticFunction : public InputFunction { Index: lld/trunk/wasm/InputChunks.cpp =================================================================== --- lld/trunk/wasm/InputChunks.cpp +++ lld/trunk/wasm/InputChunks.cpp @@ -47,7 +47,7 @@ if (Section.Relocations.empty()) return; size_t Start = getInputSectionOffset(); - size_t Size = getSize(); + size_t Size = getInputSize(); for (const WasmRelocation &R : Section.Relocations) if (R.Offset >= Start && R.Offset < Start + Size) Relocations.push_back(R); @@ -179,3 +179,123 @@ assert(!hasTableIndex()); TableIndex = Index; } + +// Write a relocation value without padding and return the number of bytes +// witten. +static unsigned writeCompressedReloc(uint8_t *Buf, const WasmRelocation &Rel, + uint32_t Value) { + switch (Rel.Type) { + case R_WEBASSEMBLY_TYPE_INDEX_LEB: + case R_WEBASSEMBLY_FUNCTION_INDEX_LEB: + case R_WEBASSEMBLY_GLOBAL_INDEX_LEB: + case R_WEBASSEMBLY_MEMORY_ADDR_LEB: + return encodeULEB128(Value, Buf); + case R_WEBASSEMBLY_TABLE_INDEX_SLEB: + case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: + return encodeSLEB128(static_cast(Value), Buf); + case R_WEBASSEMBLY_TABLE_INDEX_I32: + case R_WEBASSEMBLY_MEMORY_ADDR_I32: + return 4; + default: + llvm_unreachable("unknown relocation type"); + } +} + +static unsigned getRelocWidthPadded(const WasmRelocation &Rel) { + switch (Rel.Type) { + case R_WEBASSEMBLY_TYPE_INDEX_LEB: + case R_WEBASSEMBLY_FUNCTION_INDEX_LEB: + case R_WEBASSEMBLY_GLOBAL_INDEX_LEB: + case R_WEBASSEMBLY_MEMORY_ADDR_LEB: + case R_WEBASSEMBLY_TABLE_INDEX_SLEB: + case R_WEBASSEMBLY_MEMORY_ADDR_SLEB: + return 5; + case R_WEBASSEMBLY_TABLE_INDEX_I32: + case R_WEBASSEMBLY_MEMORY_ADDR_I32: + return 4; + default: + llvm_unreachable("unknown relocation type"); + } +} + +static unsigned getRelocWidth(const WasmRelocation &Rel, uint32_t Value) { + uint8_t Buf[5]; + return writeCompressedReloc(Buf, Rel, Value); +} + +// Relocations of type LEB and SLEB in the code section are padded to 5 bytes +// so that a fast linker can blindly overwrite them without needing to worry +// about the number of bytes needed to encode the values. +// However, for optimal output the code section can be compressed to remove +// the padding then outputting non-relocatable files. +// In this case we need to perform a size calculation based on the value at each +// relocation. At best we end up saving 4 bytes for each relocation entry. +// +// This function only computes the final output size. It must be called +// before getSize() is used to calculate of layout of the code section. +void InputFunction::calculateSize() { + if (!File || !Config->CompressRelocTargets) + return; + + DEBUG(dbgs() << "calculateSize: " << getName() << "\n"); + + const uint8_t *SecStart = File->CodeSection->Content.data(); + const uint8_t *FuncStart = SecStart + getInputSectionOffset(); + uint32_t FunctionSizeLength; + decodeULEB128(FuncStart, &FunctionSizeLength); + + uint32_t Start = getInputSectionOffset(); + uint32_t End = Start + Function->Size; + + uint32_t LastRelocEnd = Start + FunctionSizeLength; + for (WasmRelocation &Rel : Relocations) { + DEBUG(dbgs() << " region: " << (Rel.Offset - LastRelocEnd) << "\n"); + CompressedFuncSize += Rel.Offset - LastRelocEnd; + CompressedFuncSize += getRelocWidth(Rel, File->calcNewValue(Rel)); + LastRelocEnd = Rel.Offset + getRelocWidthPadded(Rel); + } + DEBUG(dbgs() << " final region: " << (End - LastRelocEnd) << "\n"); + CompressedFuncSize += End - LastRelocEnd; + + // Now we know how long the resulting function is we can add the encoding + // of its length + uint8_t Buf[5]; + CompressedSize = CompressedFuncSize + encodeULEB128(CompressedFuncSize, Buf); + + DEBUG(dbgs() << " calculateSize orig: " << Function->Size << "\n"); + DEBUG(dbgs() << " calculateSize new: " << CompressedSize << "\n"); +} + +// Override the default writeTo method so that we can (optionally) write the +// compressed version of the function. +void InputFunction::writeTo(uint8_t *Buf) const { + if (!File || !Config->CompressRelocTargets) + return InputChunk::writeTo(Buf); + + Buf += OutputOffset; + uint8_t *Orig = Buf; + + const uint8_t *SecStart = File->CodeSection->Content.data(); + const uint8_t *FuncStart = SecStart + getInputSectionOffset(); + const uint8_t *End = FuncStart + Function->Size; + uint32_t Count; + decodeULEB128(Buf, &Count); + FuncStart += Count; + + DEBUG(dbgs() << "write func: " << getName() << "\n"); + Buf += encodeULEB128(CompressedFuncSize, Buf); + const uint8_t *LastRelocEnd = FuncStart; + for (const WasmRelocation &Rel : Relocations) { + unsigned ChunkSize = (SecStart + Rel.Offset) - LastRelocEnd; + DEBUG(dbgs() << " write chunk: " << ChunkSize << "\n"); + memcpy(Buf, LastRelocEnd, ChunkSize); + Buf += ChunkSize; + Buf += writeCompressedReloc(Buf, Rel, File->calcNewValue(Rel)); + LastRelocEnd = SecStart + Rel.Offset + getRelocWidthPadded(Rel); + } + + unsigned ChunkSize = End - LastRelocEnd; + DEBUG(dbgs() << " write final chunk: " << ChunkSize << "\n"); + memcpy(Buf, LastRelocEnd, ChunkSize); + DEBUG(dbgs() << " total: " << (Buf + ChunkSize - Orig) << "\n"); +} Index: lld/trunk/wasm/Options.td =================================================================== --- lld/trunk/wasm/Options.td +++ lld/trunk/wasm/Options.td @@ -65,6 +65,8 @@ def o: JoinedOrSeparate<["-"], "o">, MetaVarName<"">, HelpText<"Path to file to write output">; +def O: JoinedOrSeparate<["-"], "O">, HelpText<"Optimize output file size">; + defm print_gc_sections: B<"print-gc-sections", "List removed unused sections", "Do not list removed unused sections">; Index: lld/trunk/wasm/OutputSections.cpp =================================================================== --- lld/trunk/wasm/OutputSections.cpp +++ lld/trunk/wasm/OutputSections.cpp @@ -85,8 +85,9 @@ OS.flush(); BodySize = CodeSectionHeader.size(); - for (InputChunk *Func : Functions) { + for (InputFunction *Func : Functions) { Func->OutputOffset = BodySize; + Func->calculateSize(); BodySize += Func->getSize(); }