diff --git a/lld/test/wasm/data-segments.ll b/lld/test/wasm/data-segments.ll --- a/lld/test/wasm/data-segments.ll +++ b/lld/test/wasm/data-segments.ll @@ -187,6 +187,13 @@ ; DIS-NEXT: i32.const 0 ; DIS-NEXT: i32.const 20 ; DIS-NEXT: memory.init 1, 0 +; NOPIC-DIS-NEXT: [[PTR]].const 1060 +; PIC-DIS-NEXT: [[PTR]].const 36 +; PIC-DIS-NEXT: global.get 1 +; PIC-DIS-NEXT: [[PTR]].add +; DIS-NEXT: i32.const 0 +; DIS-NEXT: i32.const 10000 +; DIS-NEXT: memory.fill 0 ; NOPIC-DIS-NEXT: [[PTR]].const 11060 ; PIC-DIS-NEXT: local.get 0 diff --git a/lld/test/wasm/shared-memory-bss.s b/lld/test/wasm/shared-memory-bss.s new file mode 100644 --- /dev/null +++ b/lld/test/wasm/shared-memory-bss.s @@ -0,0 +1,72 @@ +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s +# RUN: wasm-ld --experimental-pic -shared --shared-memory -o %t.so %t.o +# RUN: llvm-objdump -d --no-show-raw-insn --no-leading-addr %t.so | FileCheck %s +# RUN: obj2yaml %t.so | FileCheck %s --check-prefix=YAML + +.section .bss.foo,"",@ +.globl foo +.p2align 2 +foo: + .int32 0 + .size foo, 4 + +.section .data.bar,"",@ +.globl bar +.p2align 2 +bar: + .int32 42 + .size bar, 4 + +.section .custom_section.target_features,"",@ + .int8 2 + .int8 43 + .int8 7 + .ascii "atomics" + .int8 43 + .int8 11 + .ascii "bulk-memory" + +# Verify that there is only a single data segment and no bss +# in the binary: + +# YAML: - Type: DATA{{$}} +# YAML-NEXT: Segments: +# YAML-NEXT: - SectionOffset: 3 +# YAML-NEXT: InitFlags: 1 +# YAML-NEXT: Content: 2A000000 +# YAML-NEXT: - Type: CUSTOM + +# CHECK: <__wasm_init_memory>: +# CHECK-NEXT: .local i32 +# CHECK-NEXT: global.get 0 +# CHECK-NEXT: i32.const 8 +# CHECK-NEXT: i32.add +# CHECK-NEXT: local.set 0 +# CHECK-NEXT: block +# CHECK-NEXT: block +# CHECK-NEXT: block +# CHECK-NEXT: local.get 0 +# CHECK-NEXT: i32.const 0 +# CHECK-NEXT: i32.const 1 +# CHECK-NEXT: i32.atomic.rmw.cmpxchg 0 +# CHECK-NEXT: br_table {0, 1, 2} # 1: down to label1 +# CHECK-NEXT: # 2: down to label0 +# CHECK-NEXT: end + +# Regular data gets initialized with memory.init + +# CHECK-NEXT: i32.const 0 +# CHECK-NEXT: global.get 0 +# CHECK-NEXT: i32.add +# CHECK-NEXT: i32.const 0 +# CHECK-NEXT: i32.const 4 +# CHECK-NEXT: memory.init 0, 0 + +# BSS gets initialized with memory.fill + +# CHECK-NEXT: i32.const 4 +# CHECK-NEXT: global.get 0 +# CHECK-NEXT: i32.add +# CHECK-NEXT: i32.const 0 +# CHECK-NEXT: i32.const 4 +# CHECK-NEXT: memory.fill 0 diff --git a/lld/test/wasm/tls.s b/lld/test/wasm/tls.s --- a/lld/test/wasm/tls.s +++ b/lld/test/wasm/tls.s @@ -87,7 +87,7 @@ # CHECK-NEXT: Mutable: true # CHECK-NEXT: InitExpr: # CHECK-NEXT: Opcode: I32_CONST -# CHECK-NEXT: Value: 66576 +# CHECK-NEXT: Value: 66592 # __tls_base # CHECK-NEXT: - Index: 1 diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h --- a/lld/wasm/Config.h +++ b/lld/wasm/Config.h @@ -91,6 +91,10 @@ // for shared libraries (since they always added to a dynamic offset at // runtime). uint32_t tableBase = 0; + + // Will be set to true if bss data segments should be emitted. In most cases + // this is not necessary. + bool emitBssSegments = false; }; // The only instance of Configuration struct. diff --git a/lld/wasm/OutputSections.cpp b/lld/wasm/OutputSections.cpp --- a/lld/wasm/OutputSections.cpp +++ b/lld/wasm/OutputSections.cpp @@ -133,10 +133,9 @@ void DataSection::finalizeContents() { raw_string_ostream os(dataSectionHeader); - unsigned segmentCount = - std::count_if(segments.begin(), segments.end(), - [](OutputSegment *segment) { return !segment->isBss; }); - + unsigned segmentCount = std::count_if( + segments.begin(), segments.end(), + [](OutputSegment *segment) { return segment->requiredInBinary(); }); #ifndef NDEBUG unsigned activeCount = std::count_if( segments.begin(), segments.end(), [](OutputSegment *segment) { @@ -152,7 +151,7 @@ bodySize = dataSectionHeader.size(); for (OutputSegment *segment : segments) { - if (segment->isBss) + if (!segment->requiredInBinary()) continue; raw_string_ostream os(segment->header); writeUleb128(os, segment->initFlags, "init flags"); @@ -199,7 +198,7 @@ memcpy(buf, dataSectionHeader.data(), dataSectionHeader.size()); for (const OutputSegment *segment : segments) { - if (segment->isBss) + if (!segment->requiredInBinary()) continue; // Write data segment header uint8_t *segStart = buf + segment->sectionOffset; @@ -227,7 +226,7 @@ bool DataSection::isNeeded() const { for (const OutputSegment *seg : segments) - if (!seg->isBss) + if (seg->requiredInBinary()) return true; return false; } diff --git a/lld/wasm/OutputSegment.h b/lld/wasm/OutputSegment.h --- a/lld/wasm/OutputSegment.h +++ b/lld/wasm/OutputSegment.h @@ -24,6 +24,11 @@ void addInputSegment(InputChunk *inSeg); void finalizeInputSegments(); + // In most circumstances BSS segments don't need to be written + // to the output binary. However if the memory is imported, and + // we can't use memory.fill during startup (due to lack of bulk + // memory feature) then we include BSS segments verbatim. + bool requiredInBinary() const { return !isBss || config->emitBssSegments; } bool isTLS() const { return name == ".tdata"; } diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp --- a/lld/wasm/SyntheticSections.cpp +++ b/lld/wasm/SyntheticSections.cpp @@ -558,9 +558,10 @@ DataCountSection::DataCountSection(ArrayRef segments) : SyntheticSection(llvm::wasm::WASM_SEC_DATACOUNT), - numSegments(std::count_if( - segments.begin(), segments.end(), - [](OutputSegment *const segment) { return !segment->isBss; })) {} + numSegments(std::count_if(segments.begin(), segments.end(), + [](OutputSegment *const segment) { + return segment->requiredInBinary(); + })) {} void DataCountSection::writeBody() { writeUleb128(bodyOutputStream, numSegments, "data count"); @@ -716,7 +717,7 @@ unsigned numNames = 0; for (const OutputSegment *s : segments) - if (!s->name.empty() && !s->isBss) + if (!s->name.empty() && s->requiredInBinary()) ++numNames; return numNames; @@ -789,7 +790,7 @@ writeUleb128(sub.os, count, "name count"); for (OutputSegment *s : segments) { - if (!s->name.empty() && !s->isBss) { + if (!s->name.empty() && s->requiredInBinary()) { writeUleb128(sub.os, s->index, "global index"); writeStr(sub.os, s->name, "segment name"); } diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -73,6 +73,7 @@ void populateSymtab(); void populateProducers(); void populateTargetFeatures(); + void checkTargetFeatures(); void calculateInitFunctions(); void calculateImports(); void calculateExports(); @@ -478,25 +479,6 @@ if (!config->checkFeatures) return; - if (!config->relocatable && allowed.count("mutable-globals") == 0) { - for (const Symbol *sym : out.importSec->importedSymbols) { - if (auto *global = dyn_cast(sym)) { - if (global->getGlobalType()->Mutable) { - error(Twine("mutable global imported but 'mutable-globals' feature " - "not present in inputs: `") + - toString(*sym) + "`. Use --no-check-features to suppress."); - } - } - } - for (const Symbol *sym : out.exportSec->exportedSymbols) { - if (isa(sym)) { - error(Twine("mutable global exported but 'mutable-globals' feature " - "not present in inputs: `") + - toString(*sym) + "`. Use --no-check-features to suppress."); - } - } - } - if (config->sharedMemory) { if (disallowed.count("shared-mem")) error("--shared-memory is disallowed by " + disallowed["shared-mem"] + @@ -545,6 +527,37 @@ ". Use --no-check-features to suppress."); } } + + // Normally we don't include bss segments in the binary. In particular if + // memory is not being imported then we can assume its zero initialized. + // In the case the memory is imported, we and we can use the memory.fill + // instrction than we can also avoid inluding the segments. + if (config->importMemory && !allowed.count("bulk-memory")) + config->emitBssSegments = true; +} + +void Writer::checkTargetFeatures() { + if (config->relocatable || !config->checkFeatures) + return; + + if (out.targetFeaturesSec->features.count("mutable-globals") == 0) { + for (const Symbol *sym : out.importSec->importedSymbols) { + if (auto *global = dyn_cast(sym)) { + if (global->getGlobalType()->Mutable) { + error(Twine("mutable global imported but 'mutable-globals' feature " + "not present in inputs: `") + + toString(*sym) + "`. Use --no-check-features to suppress."); + } + } + } + for (const Symbol *sym : out.exportSec->exportedSymbols) { + if (isa(sym)) { + error(Twine("mutable global exported but 'mutable-globals' feature " + "not present in inputs: `") + + toString(*sym) + "`. Use --no-check-features to suppress."); + } + } + } } static bool shouldImport(Symbol *sym) { @@ -844,11 +857,7 @@ OutputSegment *s = make(name); if (config->sharedMemory) s->initFlags = WASM_DATA_SEGMENT_IS_PASSIVE; - // Exported memories are guaranteed to be zero-initialized, so no need - // to emit data segments for bss sections. - // TODO: consider initializing bss sections with memory.fill - // instructions when memory is imported and bulk-memory is available. - if (!config->importMemory && !config->relocatable && name.startswith(".bss")) + if (!config->relocatable && name.startswith(".bss")) s->isBss = true; segments.push_back(s); return s; @@ -944,8 +953,14 @@ } bool Writer::needsPassiveInitialization(const OutputSegment *segment) { - return segment->initFlags & WASM_DATA_SEGMENT_IS_PASSIVE && - !segment->isTLS() && !segment->isBss; + // TLS segments are initialized separately + if (segment->isTLS()) + return false; + // If bulk memory features is supported then we can perform bss initialization + // (via memory.fill) during `__wasm_init_memory`. + if (config->importMemory && !segment->requiredInBinary()) + return true; + return segment->initFlags & WASM_DATA_SEGMENT_IS_PASSIVE; } bool Writer::hasPassiveInitializedSegments() { @@ -963,7 +978,9 @@ // Passive segments are used to avoid memory being reinitialized on each // thread's instantiation. These passive segments are initialized and // dropped in __wasm_init_memory, which is registered as the start function - if (config->sharedMemory && hasPassiveInitializedSegments()) { + // We also initialize bss segments (using memory.fill) as part of this + // function. + if (hasPassiveInitializedSegments()) { WasmSym::initMemory = symtab->addSyntheticFunction( "__wasm_init_memory", WASM_SYMBOL_VISIBILITY_HIDDEN, make(nullSignature, "__wasm_init_memory")); @@ -1005,9 +1022,12 @@ void Writer::createInitMemoryFunction() { LLVM_DEBUG(dbgs() << "createInitMemoryFunction\n"); assert(WasmSym::initMemory); - assert(WasmSym::initMemoryFlag); assert(hasPassiveInitializedSegments()); - uint64_t flagAddress = WasmSym::initMemoryFlag->getVA(); + uint64_t flagAddress; + if (config->sharedMemory) { + assert(WasmSym::initMemoryFlag); + flagAddress = WasmSym::initMemoryFlag->getVA(); + } bool is64 = config->is64.getValueOr(false); std::string bodyContent; { @@ -1063,21 +1083,6 @@ // (i32.const $__init_memory_flag) // (i32.const 1) - // With PIC code we cache the flag address in local 0 - if (config->isPic) { - writeUleb128(os, 1, "num local decls"); - writeUleb128(os, 1, "local count"); - writeU8(os, is64 ? WASM_TYPE_I64 : WASM_TYPE_I32, "address type"); - writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET"); - writeUleb128(os, WasmSym::memoryBase->getGlobalIndex(), "memory_base"); - writePtrConst(os, flagAddress, is64, "flag address"); - writeU8(os, is64 ? WASM_OPCODE_I64_ADD : WASM_OPCODE_I32_ADD, "add"); - writeU8(os, WASM_OPCODE_LOCAL_SET, "local.set"); - writeUleb128(os, 0, "local 0"); - } else { - writeUleb128(os, 0, "num locals"); - } - auto writeGetFlagAddress = [&]() { if (config->isPic) { writeU8(os, WASM_OPCODE_LOCAL_GET, "local.get"); @@ -1087,34 +1092,57 @@ } }; - // Set up destination blocks - writeU8(os, WASM_OPCODE_BLOCK, "block $drop"); - writeU8(os, WASM_TYPE_NORESULT, "block type"); - writeU8(os, WASM_OPCODE_BLOCK, "block $wait"); - writeU8(os, WASM_TYPE_NORESULT, "block type"); - writeU8(os, WASM_OPCODE_BLOCK, "block $init"); - writeU8(os, WASM_TYPE_NORESULT, "block type"); - - // Atomically check whether we win the race. - writeGetFlagAddress(); - writeI32Const(os, 0, "expected flag value"); - writeI32Const(os, 1, "new flag value"); - writeU8(os, WASM_OPCODE_ATOMICS_PREFIX, "atomics prefix"); - writeUleb128(os, WASM_OPCODE_I32_RMW_CMPXCHG, "i32.atomic.rmw.cmpxchg"); - writeMemArg(os, 2, 0); - - // Based on the value, decide what to do next. - writeU8(os, WASM_OPCODE_BR_TABLE, "br_table"); - writeUleb128(os, 2, "label vector length"); - writeUleb128(os, 0, "label $init"); - writeUleb128(os, 1, "label $wait"); - writeUleb128(os, 2, "default label $drop"); - - // Initialize passive data segments - writeU8(os, WASM_OPCODE_END, "end $init"); + if (config->sharedMemory) { + // With PIC code we cache the flag address in local 0 + if (config->isPic) { + writeUleb128(os, 1, "num local decls"); + writeUleb128(os, 1, "local count"); + writeU8(os, is64 ? WASM_TYPE_I64 : WASM_TYPE_I32, "address type"); + writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET"); + writeUleb128(os, WasmSym::memoryBase->getGlobalIndex(), "memory_base"); + writePtrConst(os, flagAddress, is64, "flag address"); + writeU8(os, is64 ? WASM_OPCODE_I64_ADD : WASM_OPCODE_I32_ADD, "add"); + writeU8(os, WASM_OPCODE_LOCAL_SET, "local.set"); + writeUleb128(os, 0, "local 0"); + } else { + writeUleb128(os, 0, "num locals"); + } + + // Set up destination blocks + writeU8(os, WASM_OPCODE_BLOCK, "block $drop"); + writeU8(os, WASM_TYPE_NORESULT, "block type"); + writeU8(os, WASM_OPCODE_BLOCK, "block $wait"); + writeU8(os, WASM_TYPE_NORESULT, "block type"); + writeU8(os, WASM_OPCODE_BLOCK, "block $init"); + writeU8(os, WASM_TYPE_NORESULT, "block type"); + + // Atomically check whether we win the race. + writeGetFlagAddress(); + writeI32Const(os, 0, "expected flag value"); + writeI32Const(os, 1, "new flag value"); + writeU8(os, WASM_OPCODE_ATOMICS_PREFIX, "atomics prefix"); + writeUleb128(os, WASM_OPCODE_I32_RMW_CMPXCHG, "i32.atomic.rmw.cmpxchg"); + writeMemArg(os, 2, 0); + + // Based on the value, decide what to do next. + writeU8(os, WASM_OPCODE_BR_TABLE, "br_table"); + writeUleb128(os, 2, "label vector length"); + writeUleb128(os, 0, "label $init"); + writeUleb128(os, 1, "label $wait"); + writeUleb128(os, 2, "default label $drop"); + + // Initialize passive data segments + writeU8(os, WASM_OPCODE_END, "end $init"); + } else { + writeUleb128(os, 0, "num local decls"); + } + for (const OutputSegment *s : segments) { if (needsPassiveInitialization(s)) { - // destination address + // For passive BSS segments we can simple issue a memory.fill(0). + // For non-BSS segments we do a memory.init. Both these + // instructions take as thier first argument the destination + // address. writePtrConst(os, s->startVA, is64, "destination address"); if (config->isPic) { writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET"); @@ -1123,52 +1151,60 @@ writeU8(os, is64 ? WASM_OPCODE_I64_ADD : WASM_OPCODE_I32_ADD, "i32.add"); } - // source segment offset - writeI32Const(os, 0, "segment offset"); - // memory region size - writeI32Const(os, s->size, "memory region size"); - // memory.init instruction - writeU8(os, WASM_OPCODE_MISC_PREFIX, "bulk-memory prefix"); - writeUleb128(os, WASM_OPCODE_MEMORY_INIT, "memory.init"); - writeUleb128(os, s->index, "segment index immediate"); - writeU8(os, 0, "memory index immediate"); + if (s->isBss) { + writeI32Const(os, 0, "fill value"); + writeI32Const(os, s->size, "memory region size"); + writeU8(os, WASM_OPCODE_MISC_PREFIX, "bulk-memory prefix"); + writeUleb128(os, WASM_OPCODE_MEMORY_FILL, "memory.fill"); + writeU8(os, 0, "memory index immediate"); + } else { + writeI32Const(os, 0, "source segment offset"); + writeI32Const(os, s->size, "memory region size"); + writeU8(os, WASM_OPCODE_MISC_PREFIX, "bulk-memory prefix"); + writeUleb128(os, WASM_OPCODE_MEMORY_INIT, "memory.init"); + writeUleb128(os, s->index, "segment index immediate"); + writeU8(os, 0, "memory index immediate"); + } } } - // Set flag to 2 to mark end of initialization - writeGetFlagAddress(); - writeI32Const(os, 2, "flag value"); - writeU8(os, WASM_OPCODE_ATOMICS_PREFIX, "atomics prefix"); - writeUleb128(os, WASM_OPCODE_I32_ATOMIC_STORE, "i32.atomic.store"); - writeMemArg(os, 2, 0); - - // Notify any waiters that memory initialization is complete - writeGetFlagAddress(); - writeI32Const(os, -1, "number of waiters"); - writeU8(os, WASM_OPCODE_ATOMICS_PREFIX, "atomics prefix"); - writeUleb128(os, WASM_OPCODE_ATOMIC_NOTIFY, "atomic.notify"); - writeMemArg(os, 2, 0); - writeU8(os, WASM_OPCODE_DROP, "drop"); - - // Branch to drop the segments - writeU8(os, WASM_OPCODE_BR, "br"); - writeUleb128(os, 1, "label $drop"); - - // Wait for the winning thread to initialize memory - writeU8(os, WASM_OPCODE_END, "end $wait"); - writeGetFlagAddress(); - writeI32Const(os, 1, "expected flag value"); - writeI64Const(os, -1, "timeout"); - - writeU8(os, WASM_OPCODE_ATOMICS_PREFIX, "atomics prefix"); - writeUleb128(os, WASM_OPCODE_I32_ATOMIC_WAIT, "i32.atomic.wait"); - writeMemArg(os, 2, 0); - writeU8(os, WASM_OPCODE_DROP, "drop"); - - // Unconditionally drop passive data segments - writeU8(os, WASM_OPCODE_END, "end $drop"); + if (config->sharedMemory) { + // Set flag to 2 to mark end of initialization + writeGetFlagAddress(); + writeI32Const(os, 2, "flag value"); + writeU8(os, WASM_OPCODE_ATOMICS_PREFIX, "atomics prefix"); + writeUleb128(os, WASM_OPCODE_I32_ATOMIC_STORE, "i32.atomic.store"); + writeMemArg(os, 2, 0); + + // Notify any waiters that memory initialization is complete + writeGetFlagAddress(); + writeI32Const(os, -1, "number of waiters"); + writeU8(os, WASM_OPCODE_ATOMICS_PREFIX, "atomics prefix"); + writeUleb128(os, WASM_OPCODE_ATOMIC_NOTIFY, "atomic.notify"); + writeMemArg(os, 2, 0); + writeU8(os, WASM_OPCODE_DROP, "drop"); + + // Branch to drop the segments + writeU8(os, WASM_OPCODE_BR, "br"); + writeUleb128(os, 1, "label $drop"); + + // Wait for the winning thread to initialize memory + writeU8(os, WASM_OPCODE_END, "end $wait"); + writeGetFlagAddress(); + writeI32Const(os, 1, "expected flag value"); + writeI64Const(os, -1, "timeout"); + + writeU8(os, WASM_OPCODE_ATOMICS_PREFIX, "atomics prefix"); + writeUleb128(os, WASM_OPCODE_I32_ATOMIC_WAIT, "i32.atomic.wait"); + writeMemArg(os, 2, 0); + writeU8(os, WASM_OPCODE_DROP, "drop"); + + // Unconditionally drop passive data segments + writeU8(os, WASM_OPCODE_END, "end $drop"); + } + for (const OutputSegment *s : segments) { - if (needsPassiveInitialization(s)) { + if (needsPassiveInitialization(s) && !s->isBss) { // data.drop instruction writeU8(os, WASM_OPCODE_MISC_PREFIX, "bulk-memory prefix"); writeUleb128(os, WASM_OPCODE_DATA_DROP, "data.drop"); @@ -1495,6 +1531,8 @@ scanRelocations(); log("-- finalizeIndirectFunctionTable"); finalizeIndirectFunctionTable(); + log("-- populateTargetFeatures"); + populateTargetFeatures(); log("-- createSyntheticInitFunctions"); createSyntheticInitFunctions(); log("-- assignIndexes"); @@ -1539,12 +1577,12 @@ calculateTypes(); log("-- calculateExports"); calculateExports(); + log("-- checkTargetFeatures"); + checkTargetFeatures(); log("-- calculateCustomSections"); calculateCustomSections(); log("-- populateSymtab"); populateSymtab(); - log("-- populateTargetFeatures"); - populateTargetFeatures(); log("-- addSections"); addSections(); diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h --- a/llvm/include/llvm/BinaryFormat/Wasm.h +++ b/llvm/include/llvm/BinaryFormat/Wasm.h @@ -298,6 +298,7 @@ WASM_OPCODE_DROP = 0x1a, WASM_OPCODE_MISC_PREFIX = 0xfc, WASM_OPCODE_MEMORY_INIT = 0x08, + WASM_OPCODE_MEMORY_FILL = 0x0b, WASM_OPCODE_DATA_DROP = 0x09, WASM_OPCODE_ATOMICS_PREFIX = 0xfe, WASM_OPCODE_ATOMIC_NOTIFY = 0x00,