diff --git a/lld/test/wasm/pie.ll b/lld/test/wasm/pie.ll --- a/lld/test/wasm/pie.ll +++ b/lld/test/wasm/pie.ll @@ -92,15 +92,64 @@ ; DISASSEM-NEXT: call 2 ; DISASSEM-NEXT: end +; Run the same test with extended-const support. When this is available +; we don't need __wasm_apply_global_relocs and instead reley on the add +; instruction in the InitExpr. We also, therefore, do not need these globals +; to be mutable. + +; RUN: llc -relocation-model=pic -mattr=+extended-const,+mutable-globals,+atomics,+bulk-memory -filetype=obj %s -o %t.extended.o +; RUN: wasm-ld --no-gc-sections --allow-undefined --experimental-pic -pie -o %t.extended.wasm %t.extended.o +; RUN: obj2yaml %t.extended.wasm | FileCheck %s --check-prefix=EXTENDED-CONST + +; EXTENDED-CONST-NOT: __wasm_apply_global_relocs + +; EXTENDED-CONST: - Type: GLOBAL +; EXTENDED-CONST-NEXT: Globals: +; EXTENDED-CONST-NEXT: - Index: 4 +; EXTENDED-CONST-NEXT: Type: I32 +; EXTENDED-CONST-NEXT: Mutable: false +; EXTENDED-CONST-NEXT: InitExpr: +; EXTENDED-CONST-NEXT: Opcode: GLOBAL_GET +; EXTENDED-CONST-NEXT: Index: 1 +; EXTENDED-CONST-NEXT: - Index: 5 +; EXTENDED-CONST-NEXT: Type: I32 +; EXTENDED-CONST-NEXT: Mutable: false +; EXTENDED-CONST-NEXT: InitExpr: +; EXTENDED-CONST-NEXT: Extended: true +; EXTENDED-CONST-NEXT: Body: 230141046A0B +; EXTENDED-CONST-NEXT: - Index: 6 +; EXTENDED-CONST-NEXT: Type: I32 +; EXTENDED-CONST-NEXT: Mutable: false +; EXTENDED-CONST-NEXT: InitExpr: +; EXTENDED-CONST-NEXT: Extended: true +; EXTENDED-CONST-NEXT: Body: 2301410C6A0B + +; EXTENDED-CONST: - Type: START +; EXTENDED-CONST-NEXT: StartFunction: 2 + +; EXTENDED-CONST: FunctionNames: +; EXTENDED-CONST-NEXT: - Index: 0 +; EXTENDED-CONST-NEXT: Name: external_func +; EXTENDED-CONST-NEXT: - Index: 1 +; EXTENDED-CONST-NEXT: Name: __wasm_call_ctors +; EXTENDED-CONST-NEXT: - Index: 2 +; EXTENDED-CONST-NEXT: Name: __wasm_apply_data_relocs + ; Run the same test with threading support. In this mode ; we expect __wasm_init_memory and __wasm_apply_data_relocs ; to be generated along with __wasm_start as the start ; function. -; RUN: llc -relocation-model=pic -mattr=+mutable-globals,+atomics,+bulk-memory -filetype=obj %s -o %t.shmem.o -; RUN: wasm-ld --no-gc-sections --shared-memory --allow-undefined --experimental-pic -pie -o %t.shmem.wasm %t.shmem.o -; RUN: obj2yaml %t.shmem.wasm | FileCheck %s --check-prefix=SHMEM -; RUN: llvm-objdump --disassemble-symbols=__wasm_start --no-show-raw-insn --no-leading-addr %t.shmem.wasm | FileCheck %s --check-prefixes DISASSEM-SHMEM +; DISASSEM-EXTENDED-CONST: <__wasm_start>: +; DISASSEM-EXTENDED-CONST-EMPTY: +; DISASSEM-EXTENDED-CONST-NEXT: call 5 +; DISASSEM-EXTENDED-CONST-NEXT: call 3 +; DISASSEM-EXTENDED-CONST-NEXT: end + +; RUN: llc -relocation-model=pic -mattr=+mutable-globals,+atomics,+bulk-memory -filetype=obj %s -o %t.ext.o +; RUN: wasm-ld --no-gc-sections --shared-memory --allow-undefined --experimental-pic -pie -o %t.ext.wasm %t.ext.o +; RUN: obj2yaml %t.ext.wasm | FileCheck %s --check-prefix=SHMEM +; RUN: llvm-objdump --disassemble-symbols=__wasm_start --no-show-raw-insn --no-leading-addr %t.ext.wasm | FileCheck %s --check-prefix DISASSEM-SHMEM ; SHMEM: - Type: START ; SHMEM-NEXT: StartFunction: 6 @@ -132,4 +181,3 @@ ; SHMEM-NEXT: Name: get_data_address ; SHMEM-NEXT: - Index: 9 ; SHMEM-NEXT: Name: _start - diff --git a/lld/test/wasm/tls-non-shared-memory.s b/lld/test/wasm/tls-non-shared-memory.s --- a/lld/test/wasm/tls-non-shared-memory.s +++ b/lld/test/wasm/tls-non-shared-memory.s @@ -51,6 +51,9 @@ # RUN: wasm-ld --experimental-pic --no-gc-sections --no-entry -pie -o %t-pie.wasm %t.o # RUN: obj2yaml %t-pie.wasm | FileCheck %s --check-prefixes=PIE,PIC +# RUN: wasm-ld --experimental-pic --features=atomics,bulk-memory,extended-const --no-gc-sections --no-entry -pie -o %t-extended-const.wasm %t.o +# RUN: obj2yaml %t-extended-const.wasm | FileCheck %s --check-prefixes=EXT-CONST + # CHECK: - Type: GLOBAL # __stack_pointer # CHECK-NEXT: Globals: @@ -136,3 +139,24 @@ # PIC-NEXT: Index: {{\d*}} # PIC-NEXT: Content: 2B0000002A000000 # PIC-NEXT: - Type: CUSTOM + +# Unless we have extended-const, in which case the merging is not needed +# There the firts segment is placed directly at `__memory_base` and the second +# one is offset from `__memory_base` using `i32.add` and a constant. + +# EXT-CONST: - Type: DATA +# EXT-CONST-NEXT: Segments: +# EXT-CONST-NEXT: - SectionOffset: 6 +# EXT-CONST-NEXT: InitFlags: 0 +# EXT-CONST-NEXT: Offset: +# EXT-CONST-NEXT: Opcode: GLOBAL_GET +# EXT-CONST-NEXT: Index: 1 +# EXT-CONST-NEXT: Content: 2B000000 +# EXT-CONST-NEXT: - SectionOffset: 18 +# EXT-CONST-NEXT: InitFlags: 0 +# EXT-CONST-NEXT: Offset: +# EXT-CONST-NEXT: Extended: true +# This instruction sequnce decodes to: +# (global.get[0x23] 0x1 i32.const[0x41] 0x04 i32.add[0x6A] end[0x0b]) +# EXT-CONST-NEXT: Body: 230141046A0B +# EXT-CONST-NEXT: Content: 2A000000 diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h --- a/lld/wasm/Config.h +++ b/lld/wasm/Config.h @@ -35,6 +35,7 @@ bool exportAll; bool exportDynamic; bool exportTable; + bool extendedConst; bool growableTable; bool gcSections; bool importMemory; diff --git a/lld/wasm/OutputSections.cpp b/lld/wasm/OutputSections.cpp --- a/lld/wasm/OutputSections.cpp +++ b/lld/wasm/OutputSections.cpp @@ -143,12 +143,14 @@ }); #endif - assert((config->sharedMemory || !config->isPic || activeCount <= 1) && + assert((config->sharedMemory || !config->isPic || config->extendedConst || + activeCount <= 1) && "output segments should have been combined by now"); writeUleb128(os, segmentCount, "data segment count"); os.flush(); bodySize = dataSectionHeader.size(); + bool is64 = config->is64.getValueOr(false); for (OutputSegment *segment : segments) { if (!segment->requiredInBinary()) @@ -158,15 +160,27 @@ if (segment->initFlags & WASM_DATA_SEGMENT_HAS_MEMINDEX) writeUleb128(os, 0, "memory index"); if ((segment->initFlags & WASM_DATA_SEGMENT_IS_PASSIVE) == 0) { - WasmInitExpr initExpr; - initExpr.Extended = false; - if (config->isPic) { - initExpr.Inst.Opcode = WASM_OPCODE_GLOBAL_GET; - initExpr.Inst.Value.Global = WasmSym::memoryBase->getGlobalIndex(); + if (config->isPic && config->extendedConst) { + writeU8(os, WASM_OPCODE_GLOBAL_GET, "global get"); + writeUleb128(os, WasmSym::memoryBase->getGlobalIndex(), + "literal (global index)"); + if (segment->startVA) { + writePtrConst(os, segment->startVA, is64, "offset"); + writeU8(os, is64 ? WASM_OPCODE_I64_ADD : WASM_OPCODE_I32_ADD, "add"); + } + writeU8(os, WASM_OPCODE_END, "opcode:end"); } else { - initExpr = intConst(segment->startVA, config->is64.getValueOr(false)); + WasmInitExpr initExpr; + initExpr.Extended = false; + if (config->isPic) { + assert(segment->startVA == 0); + initExpr.Inst.Opcode = WASM_OPCODE_GLOBAL_GET; + initExpr.Inst.Value.Global = WasmSym::memoryBase->getGlobalIndex(); + } else { + initExpr = intConst(segment->startVA, is64); + } + writeInitExpr(os, initExpr); } - writeInitExpr(os, initExpr); } writeUleb128(os, segment->size, "segment size"); os.flush(); diff --git a/lld/wasm/SyntheticSections.h b/lld/wasm/SyntheticSections.h --- a/lld/wasm/SyntheticSections.h +++ b/lld/wasm/SyntheticSections.h @@ -288,6 +288,8 @@ // transform a `global.get` to an `i32.const`. void addInternalGOTEntry(Symbol *sym); bool needsRelocations() { + if (config->extendedConst) + return false; return llvm::find_if(internalGotSymbols, [=](Symbol *sym) { return !sym->isTLS(); }) != internalGotSymbols.end(); diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp --- a/lld/wasm/SyntheticSections.cpp +++ b/lld/wasm/SyntheticSections.cpp @@ -408,6 +408,7 @@ } void GlobalSection::generateRelocationCode(raw_ostream &os, bool TLS) const { + assert(!config->extendedConst); bool is64 = config->is64.getValueOr(false); unsigned opcode_ptr_const = is64 ? WASM_OPCODE_I64_CONST : WASM_OPCODE_I32_CONST; @@ -463,10 +464,10 @@ for (const Symbol *sym : internalGotSymbols) { bool mutable_ = false; if (!sym->isStub) { - // In the case of dynamic linking, these global must to be mutable since - // they get updated to the correct runtime value during - // `__wasm_apply_global_relocs`. - if (config->isPic && !sym->isTLS()) + // In the case of dynamic linking, unless we have 'extended-const' + // available, these global must to be mutable since they get updated to + // the correct runtime value during `__wasm_apply_global_relocs`. + if (!config->extendedConst && config->isPic && !sym->isTLS()) mutable_ = true; // With multi-theadeding any TLS globals must be mutable since they get // set during `__wasm_apply_global_tls_relocs` @@ -474,17 +475,33 @@ mutable_ = true; } WasmGlobalType type{itype, mutable_}; - WasmInitExpr initExpr; - if (auto *d = dyn_cast(sym)) - initExpr = intConst(d->getVA(), is64); - else if (auto *f = dyn_cast(sym)) - initExpr = intConst(f->isStub ? 0 : f->getTableIndex(), is64); - else { - assert(isa(sym)); - initExpr = intConst(0, is64); - } writeGlobalType(os, type); - writeInitExpr(os, initExpr); + + if (config->extendedConst && config->isPic && !sym->isTLS() && + isa(sym)) { + // We can use an extended init expression to add a constant + // offset of __memory_base. + auto *d = cast(sym); + writeU8(os, WASM_OPCODE_GLOBAL_GET, "global get"); + writeUleb128(os, WasmSym::memoryBase->getGlobalIndex(), + "literal (global index)"); + if (d->getVA()) { + writePtrConst(os, d->getVA(), is64, "offset"); + writeU8(os, is64 ? WASM_OPCODE_I64_ADD : WASM_OPCODE_I32_ADD, "add"); + } + writeU8(os, WASM_OPCODE_END, "opcode:end"); + } else { + WasmInitExpr initExpr; + if (auto *d = dyn_cast(sym)) + initExpr = intConst(d->getVA(), is64); + else if (auto *f = dyn_cast(sym)) + initExpr = intConst(f->isStub ? 0 : f->getTableIndex(), is64); + else { + assert(isa(sym)); + initExpr = intConst(0, is64); + } + writeInitExpr(os, initExpr); + } } for (const DefinedData *sym : dataAddressGlobals) { WasmGlobalType type{itype, false}; diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -450,7 +450,7 @@ auto &explicitFeatures = config->features.getValue(); allowed.insert(explicitFeatures.begin(), explicitFeatures.end()); if (!config->checkFeatures) - return; + goto done; } // Find the sets of used, required, and disallowed features @@ -486,7 +486,7 @@ allowed.insert(std::string(key)); if (!config->checkFeatures) - return; + goto done; if (config->sharedMemory) { if (disallowed.count("shared-mem")) @@ -537,12 +537,19 @@ } } +done: // Normally we don't include bss segments in the binary. In particular if // memory is not being imported then we can assume its zero initialized. // In the case the memory is imported, we and we can use the memory.fill // instrction than we can also avoid inluding the segments. if (config->importMemory && !allowed.count("bulk-memory")) config->emitBssSegments = true; + + if (allowed.count("extended-const")) + config->extendedConst = true; + + for (auto &feature : allowed) + log("Allowed feature: " + feature); } void Writer::checkImportExportTargetFeatures() { @@ -920,9 +927,9 @@ // With PIC code we currently only support a single active data segment since // we only have a single __memory_base to use as our base address. This pass // combines all data segments into a single .data segment. - // This restructions can be relaxed once we have extended constant - // expressions available: - // https://github.com/WebAssembly/extended-const + // This restructions does now apply when the extended const extension is + // available: https://github.com/WebAssembly/extended-const + assert(!config->extendedConst); assert(config->isPic && !config->sharedMemory); if (segments.size() <= 1) return; @@ -1553,7 +1560,14 @@ } } - if (config->isPic && !config->sharedMemory) { + log("-- populateTargetFeatures"); + populateTargetFeatures(); + + // When outputing PIC code each segment lives at at fixes offset from the + // `__memory_base` import. Unless we support the extended const epression we + // can't do addition inside the constant expression, so we much combine the + // segment into a single one that can live at `__memory_base`. + if (config->isPic && !config->extendedConst && !config->sharedMemory) { // In shared memory mode all data segments are passive and initialized // via __wasm_init_memory. log("-- combineOutputSegments"); @@ -1570,8 +1584,6 @@ scanRelocations(); log("-- finalizeIndirectFunctionTable"); finalizeIndirectFunctionTable(); - log("-- populateTargetFeatures"); - populateTargetFeatures(); log("-- createSyntheticInitFunctions"); createSyntheticInitFunctions(); log("-- assignIndexes");