Index: lld/test/wasm/data-segments.ll =================================================================== --- lld/test/wasm/data-segments.ll +++ lld/test/wasm/data-segments.ll @@ -30,12 +30,12 @@ ; Also test in combination with PIC/pie ; RUN: wasm-ld --experimental-pic -pie -no-gc-sections --no-entry --shared-memory --max-memory=131072 %t.atomics.bulk-mem.pic.o -o %t.pic.wasm ; RUN: obj2yaml %t.pic.wasm | FileCheck %s --check-prefixes PASSIVE-PIC,PASSIVE32-PIC -; RUN: llvm-objdump --disassemble-symbols=__wasm_call_ctors,__wasm_apply_data_relocs,__wasm_init_memory --no-show-raw-insn --no-leading-addr %t.pic.wasm | FileCheck %s --check-prefixes DIS,PIC-DIS -DPTR=i32 +; RUN: llvm-objdump --disassemble-symbols=__wasm_call_ctors,__wasm_apply_data_relocs,__wasm_apply_data_relocs_tail,__wasm_init_memory --no-show-raw-insn --no-leading-addr %t.pic.wasm | FileCheck %s --check-prefixes DIS,PIC-DIS -DPTR=i32 ; Also test in combination with PIC/pie + wasm64 ; RUN: wasm-ld -mwasm64 --experimental-pic -pie -no-gc-sections --no-entry --shared-memory --max-memory=131072 %t.atomics.bulk-mem.pic-mem64.o -o %t.pic-mem64.wasm ; RUN: obj2yaml %t.pic-mem64.wasm | FileCheck %s --check-prefixes PASSIVE-PIC,PASSIVE64-PIC -; RUN: llvm-objdump --disassemble-symbols=__wasm_call_ctors,__wasm_apply_data_relocs,__wasm_init_memory --no-show-raw-insn --no-leading-addr %t.pic-mem64.wasm | FileCheck %s --check-prefixes DIS,PIC-DIS -DPTR=i64 +; RUN: llvm-objdump --disassemble-symbols=__wasm_call_ctors,__wasm_apply_data_relocs,__wasm_apply_data_relocs_tail,__wasm_init_memory --no-show-raw-insn --no-leading-addr %t.pic-mem64.wasm | FileCheck %s --check-prefixes DIS,PIC-DIS -DPTR=i64 @a = hidden global [6 x i8] c"hello\00", align 1 @b = hidden global [8 x i8] c"goodbye\00", align 1 @@ -113,7 +113,7 @@ ; PASSIVE-NEXT: Name: __wasm_init_memory ; PASSIVE-PIC: - Type: START -; PASSIVE-PIC-NEXT: StartFunction: 3 +; PASSIVE-PIC-NEXT: StartFunction: 4 ; PASSIVE-PIC-NEXT: - Type: DATACOUNT ; PASSIVE-PIC-NEXT: Count: 3 ; PASSIVE-PIC-NEXT: - Type: CODE @@ -128,6 +128,9 @@ ; PASSIVE-PIC-NEXT: Locals: [] ; PASSIVE-PIC-NEXT: Body: 0B ; PASSIVE-PIC-NEXT: - Index: 3 +; PASSIVE-PIC-NEXT: Locals: [] +; PASSIVE-PIC-NEXT: Body: 0B +; PASSIVE-PIC-NEXT: - Index: 4 ; PASSIVE-PIC-NEXT: Locals: ; PASSIVE32-PIC-NEXT: - Type: I32 ; PASSIVE64-PIC-NEXT: - Type: I64 @@ -154,6 +157,8 @@ ; PASSIVE-PIC-NEXT: - Index: 2 ; PASSIVE-PIC-NEXT: Name: __wasm_apply_data_relocs ; PASSIVE-PIC-NEXT: - Index: 3 +; PASSIVE-PIC-NEXT: Name: __wasm_apply_data_relocs_tail +; PASSIVE-PIC-NEXT: - Index: 4 ; PASSIVE-PIC-NEXT: Name: __wasm_init_memory ; no data relocations. @@ -161,10 +166,14 @@ ; DIS-EMPTY: ; DIS-NEXT: end -; In PIC mode __wasm_apply_data_relocs is export seperatly to __wasm_call_ctors +; In PIC mode __wasm_apply_data_relocs and __wasm_apply_data_relocs_tail +; are exported seperately to __wasm_call_ctors ; PIC-DIS: <__wasm_apply_data_relocs>: ; PIC-DIS-EMPTY: +; PIC-DIS: <__wasm_apply_data_relocs_tail>: +; PIC-DIS-EMPTY: + ; DIS-LABEL: <__wasm_init_memory>: ; PIC-DIS: .local [[PTR]] Index: lld/test/wasm/pie.ll =================================================================== --- lld/test/wasm/pie.ll +++ lld/test/wasm/pie.ll @@ -1,7 +1,7 @@ ; RUN: llc -relocation-model=pic -mattr=+mutable-globals -filetype=obj %s -o %t.o ; RUN: wasm-ld --no-gc-sections --experimental-pic -pie -o %t.wasm %t.o ; RUN: obj2yaml %t.wasm | FileCheck %s -; RUN: llvm-objdump --disassemble-symbols=__wasm_call_ctors,__wasm_apply_data_relocs --no-show-raw-insn --no-leading-addr %t.wasm | FileCheck %s --check-prefixes DISASSEM +; RUN: llvm-objdump --disassemble-symbols=__wasm_call_ctors,__wasm_apply_data_relocs,__wasm_apply_data_relocs_tail --no-show-raw-insn --no-leading-addr %t.wasm | FileCheck %s --check-prefixes DISASSEM target triple = "wasm32-unknown-emscripten" @@ -70,7 +70,7 @@ ; CHECK-NEXT: GlobalMutable: false ; CHECK: - Type: START -; CHECK-NEXT: StartFunction: 3 +; CHECK-NEXT: StartFunction: 4 ; CHECK: - Type: CUSTOM ; CHECK-NEXT: Name: name @@ -82,12 +82,14 @@ ; CHECK-NEXT: - Index: 2 ; CHECK-NEXT: Name: __wasm_apply_data_relocs ; CHECK-NEXT: - Index: 3 -; CHECK-NEXT: Name: __wasm_apply_global_relocs +; CHECK-NEXT: Name: __wasm_apply_data_relocs_tail ; CHECK-NEXT: - Index: 4 -; CHECK-NEXT: Name: foo +; CHECK-NEXT: Name: __wasm_apply_global_relocs ; CHECK-NEXT: - Index: 5 -; CHECK-NEXT: Name: get_data_address +; CHECK-NEXT: Name: foo ; CHECK-NEXT: - Index: 6 +; CHECK-NEXT: Name: get_data_address +; CHECK-NEXT: - Index: 7 ; CHECK-NEXT: Name: _start ; CHECK-NEXT: GlobalNames: @@ -98,6 +100,9 @@ ; DISASSEM-LABEL: <__wasm_apply_data_relocs>: ; DISASSEM: end +; DISASSEM-LABEL: <__wasm_apply_data_relocs_tail>: +; DISASSEM: end + ; Run the same test with extended-const support. When this is available ; we don't need __wasm_apply_global_relocs and instead rely on the add ; instruction in the InitExpr. We also, therefore, do not need these globals @@ -141,9 +146,11 @@ ; EXTENDED-CONST-NEXT: Name: __wasm_call_ctors ; EXTENDED-CONST-NEXT: - Index: 2 ; EXTENDED-CONST-NEXT: Name: __wasm_apply_data_relocs +; EXTENDED-CONST-NEXT: - Index: 3 +; EXTENDED-CONST-NEXT: Name: __wasm_apply_data_relocs_tail -; Run the same test with threading support. In this mode -; we expect __wasm_init_memory and __wasm_apply_data_relocs +; Run the same test with threading support. In this mode we expect +; __wasm_init_memory, __wasm_apply_data_relocs and __wasm_apply_data_relocs ; to be generated along with __wasm_start as the start ; function. @@ -153,12 +160,12 @@ ; RUN: llvm-objdump --disassemble-symbols=__wasm_start --no-show-raw-insn --no-leading-addr %t.shmem.wasm | FileCheck %s --check-prefix DISASSEM-SHMEM ; SHMEM: - Type: START -; SHMEM-NEXT: StartFunction: 6 +; SHMEM-NEXT: StartFunction: 7 ; DISASSEM-SHMEM-LABEL: <__wasm_start>: ; DISASSEM-SHMEM-EMPTY: +; DISASSEM-SHMEM-NEXT: call 6 ; DISASSEM-SHMEM-NEXT: call 5 -; DISASSEM-SHMEM-NEXT: call 4 ; DISASSEM-SHMEM-NEXT: end ; SHMEM: FunctionNames: @@ -171,14 +178,16 @@ ; SHMEM-NEXT: - Index: 3 ; SHMEM-NEXT: Name: __wasm_apply_data_relocs ; SHMEM-NEXT: - Index: 4 -; SHMEM-NEXT: Name: __wasm_init_memory +; SHMEM-NEXT: Name: __wasm_apply_data_relocs_tail ; SHMEM-NEXT: - Index: 5 -; SHMEM-NEXT: Name: __wasm_apply_global_relocs +; SHMEM-NEXT: Name: __wasm_init_memory ; SHMEM-NEXT: - Index: 6 -; SHMEM-NEXT: Name: __wasm_start +; SHMEM-NEXT: Name: __wasm_apply_global_relocs ; SHMEM-NEXT: - Index: 7 -; SHMEM-NEXT: Name: foo +; SHMEM-NEXT: Name: __wasm_start ; SHMEM-NEXT: - Index: 8 -; SHMEM-NEXT: Name: get_data_address +; SHMEM-NEXT: Name: foo ; SHMEM-NEXT: - Index: 9 +; SHMEM-NEXT: Name: get_data_address +; SHMEM-NEXT: - Index: 10 ; SHMEM-NEXT: Name: _start Index: lld/wasm/ApplyDataRelocsSplitter.h =================================================================== --- /dev/null +++ lld/wasm/ApplyDataRelocsSplitter.h @@ -0,0 +1,37 @@ +#ifndef LLD_WASM_APPLY_DATA_RELOCS_SPLITTER_H +#define LLD_WASM_APPLY_DATA_RELOCS_SPLITTER_H + +#include +#include + +namespace lld::wasm { + +class ApplyDataRelocsSplitter { +public: + using SplittedFunctions = std::pair; + + ApplyDataRelocsSplitter(const std::string &functionBody); + + // Splits the function up into 2 functions. Cuts off the instructions + // that exceed the function size limit and put them into the second function. + // The first function calls the second one at the end. + // The last instruction that is gonna be left in the first function + // is WASM_OPCODE_I32_STORE/WASM_OPCODE_I64_STORE (+ align & offset) + // If the function doesn't exceed the limit returns the pair of the given + // function and the empty one. + SplittedFunctions split(); + +private: + size_t findPositionToCutInstructionsFrom(unsigned instructionOpcode) const; + void appendApplyDataRelocsTailCall(std::string &functionBody, + size_t pos) const; + void appendCutOffInstructions(std::string &functionBody, size_t pos) const; + + std::string createEmptyFunctionBody() const; + + const std::string &function_to_split_; +}; + +} // namespace lld::wasm + +#endif Index: lld/wasm/ApplyDataRelocsSplitter.cpp =================================================================== --- /dev/null +++ lld/wasm/ApplyDataRelocsSplitter.cpp @@ -0,0 +1,78 @@ +#include "ApplyDataRelocsSplitter.h" +#include "Config.h" +#include "Symbols.h" +#include "WriterUtils.h" +#include "llvm/BinaryFormat/Wasm.h" +#include "llvm/Support/raw_ostream.h" + +namespace lld::wasm { + +ApplyDataRelocsSplitter::ApplyDataRelocsSplitter( + const std::string &functionBody) + : function_to_split_(functionBody) {} + +ApplyDataRelocsSplitter::SplittedFunctions ApplyDataRelocsSplitter::split() { + // don't split the function if it doesn't exceed the limit + if (function_to_split_.size() < config->maxFunctionSize) + return {function_to_split_, createEmptyFunctionBody()}; + + using namespace llvm::wasm; + + SplittedFunctions functions; + const bool is64 = config->is64.value_or(false); + const unsigned store_opcode = + is64 ? WASM_OPCODE_I64_STORE : WASM_OPCODE_I32_STORE; + + const size_t pos = findPositionToCutInstructionsFrom(store_opcode); + // don't split the function if we didn't find an appropriate instruction + if (pos == std::string::npos) + return {function_to_split_, createEmptyFunctionBody()}; + + appendApplyDataRelocsTailCall(functions.first, pos); + appendCutOffInstructions(functions.second, pos); + + return functions; +} + +size_t ApplyDataRelocsSplitter::findPositionToCutInstructionsFrom( + unsigned instructionOpcode) const { + // Skip some instructions to leave the place for CALL instruction and other + constexpr size_t skip_instruction_count = 10; + const size_t instruction_opcode_pos = function_to_split_.rfind( + instructionOpcode, config->maxFunctionSize - skip_instruction_count); + + // includes the following instructions: current instruction + align + offset + constexpr size_t offset = 3; + + return instruction_opcode_pos + offset; +} + +void ApplyDataRelocsSplitter::appendApplyDataRelocsTailCall( + std::string &functionBody, size_t pos) const { + llvm::raw_string_ostream first_os(functionBody); + functionBody = function_to_split_.substr(0, pos); + + writeU8(first_os, llvm::wasm::WASM_OPCODE_CALL, "CALL"); + writeUleb128(first_os, WasmSym::applyDataRelocsTail->getFunctionIndex(), + "function index"); + writeU8(first_os, llvm::wasm::WASM_OPCODE_END, "END"); +} + +void ApplyDataRelocsSplitter::appendCutOffInstructions( + std::string &functionBody, size_t pos) const { + llvm::raw_string_ostream second_os(functionBody); + writeUleb128(second_os, 0, "num locals"); + functionBody.append(function_to_split_.substr(pos)); +} + +std::string ApplyDataRelocsSplitter::createEmptyFunctionBody() const { + std::string bodyContent; + llvm::raw_string_ostream os(bodyContent); + + writeUleb128(os, 0, "num locals"); + writeU8(os, llvm::wasm::WASM_OPCODE_END, "END"); + + return bodyContent; +} + +} // namespace lld::wasm Index: lld/wasm/CMakeLists.txt =================================================================== --- lld/wasm/CMakeLists.txt +++ lld/wasm/CMakeLists.txt @@ -3,6 +3,7 @@ add_public_tablegen_target(WasmOptionsTableGen) add_lld_library(lldWasm + ApplyDataRelocsSplitter.cpp Driver.cpp InputChunks.cpp InputFiles.cpp Index: lld/wasm/Config.h =================================================================== --- lld/wasm/Config.h +++ lld/wasm/Config.h @@ -67,6 +67,9 @@ bool ltoDebugPassManager; UnresolvedPolicy unresolvedSymbols; + // https://github.com/v8/v8/blob/master/src/wasm/wasm-limits.h#L47 + uint64_t maxFunctionSize = 7'654'321; + llvm::StringRef entry; llvm::StringRef mapFile; llvm::StringRef outputFile; Index: lld/wasm/Driver.cpp =================================================================== --- lld/wasm/Driver.cpp +++ lld/wasm/Driver.cpp @@ -763,6 +763,11 @@ "__wasm_apply_data_relocs", WASM_SYMBOL_VISIBILITY_DEFAULT | WASM_SYMBOL_EXPORTED, make(nullSignature, "__wasm_apply_data_relocs")); + + WasmSym::applyDataRelocsTail = symtab->addSyntheticFunction( + "__wasm_apply_data_relocs_tail", WASM_SYMBOL_VISIBILITY_HIDDEN, + make(nullSignature, + "__wasm_apply_data_relocs_tail")); } } Index: lld/wasm/Symbols.h =================================================================== --- lld/wasm/Symbols.h +++ lld/wasm/Symbols.h @@ -573,6 +573,15 @@ // Function that applies relocations to data segment post-instantiation. static DefinedFunction *applyDataRelocs; + // __wasm_apply_data_relocs_tail + // __wasm_apply_data_relocs can grow drastically exceeding the function + // size limit. In that case, we split it up into 2 functions, so that + // the extra number of instructions go to applyDataRelocsTail. + // This function is called only from applyDataRelocs and only if + // the latter exceeds the function size limit. The rest of the time this + // function is empty. + static DefinedFunction *applyDataRelocsTail; + // __wasm_apply_global_relocs // Function that applies relocations to wasm globals post-instantiation. // Unlike __wasm_apply_data_relocs this needs to run on every thread. Index: lld/wasm/Symbols.cpp =================================================================== --- lld/wasm/Symbols.cpp +++ lld/wasm/Symbols.cpp @@ -76,6 +76,7 @@ DefinedFunction *WasmSym::callDtors; DefinedFunction *WasmSym::initMemory; DefinedFunction *WasmSym::applyDataRelocs; +DefinedFunction *WasmSym::applyDataRelocsTail; DefinedFunction *WasmSym::applyGlobalRelocs; DefinedFunction *WasmSym::applyGlobalTLSRelocs; DefinedFunction *WasmSym::initTLS; Index: lld/wasm/Writer.cpp =================================================================== --- lld/wasm/Writer.cpp +++ lld/wasm/Writer.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "Writer.h" +#include "ApplyDataRelocsSplitter.h" #include "Config.h" #include "InputChunks.h" #include "InputElement.h" @@ -1338,7 +1339,10 @@ writeU8(os, WASM_OPCODE_END, "END"); } - createFunction(WasmSym::applyDataRelocs, bodyContent); + ApplyDataRelocsSplitter splitter(bodyContent); + auto &&[firstFunctionBody, secondFunctionBody] = splitter.split(); + createFunction(WasmSym::applyDataRelocs, firstFunctionBody); + createFunction(WasmSym::applyDataRelocsTail, secondFunctionBody); } // Similar to createApplyDataRelocationsFunction but generates relocation code