diff --git a/lld/test/wasm/custom-section-name.ll b/lld/test/wasm/custom-section-name.ll --- a/lld/test/wasm/custom-section-name.ll +++ b/lld/test/wasm/custom-section-name.ll @@ -1,8 +1,12 @@ ; RUN: llc -filetype=obj %s -o %t.o + ; RUN: wasm-ld -no-gc-sections --no-entry -o %t.wasm %t.o ; RUN: obj2yaml %t.wasm | FileCheck %s --check-prefixes=CHECK,NO-BSS + ; RUN: wasm-ld -no-gc-sections --no-entry --import-memory -o %t.bss.wasm %t.o -; RUN: obj2yaml %t.bss.wasm | FileCheck %s --check-prefixes=CHECK,BSS +; RUN: obj2yaml %t.bss.wasm | FileCheck %s --check-prefix=CHECK +; RUN: llvm-objdump -d --no-show-raw-insn --no-leading-addr %t.bss.wasm | FileCheck %s --check-prefix=BSS + ; RUN: wasm-ld -no-gc-sections --no-entry -o %t_reloc.o %t.o --relocatable ; RUN: obj2yaml %t_reloc.o | FileCheck -check-prefix RELOC %s @@ -34,13 +38,16 @@ ; CHECK-NEXT: Opcode: I32_CONST ; CHECK-NEXT: Value: 1032 ; CHECK-NEXT: Content: '07000000' -; BSS-NEXT: - SectionOffset: 37 -; BSS-NEXT: InitFlags: 0 -; BSS-NEXT: Offset: -; BSS-NEXT: Opcode: I32_CONST -; BSS-NEXT: Value: 1036 -; BSS-NEXT: Content: '00000000' -; NO-BSS-NOT: - SectionOffset: +; CHECK-NOT: - SectionOffset: +; NO-BSS-NOT: __wasm_init_memory + +; BSS-LABEL: <__wasm_init_memory>: +; BSS-EMPTY: +; BSS-NEXT: i32.const 1036 +; BSS-NEXT: i32.const 0 +; BSS-NEXT: i32.const 4 +; BSS-NEXT: memory.fill 0 +; BSS-NEXT: end ; RELOC-LABEL: SegmentInfo: ; RELOC-NEXT: - Index: 0 diff --git a/lld/test/wasm/data-segments.ll b/lld/test/wasm/data-segments.ll --- a/lld/test/wasm/data-segments.ll +++ b/lld/test/wasm/data-segments.ll @@ -187,6 +187,13 @@ ; DIS-NEXT: i32.const 0 ; DIS-NEXT: i32.const 20 ; DIS-NEXT: memory.init 1, 0 +; NOPIC-DIS-NEXT: [[PTR]].const 1060 +; PIC-DIS-NEXT: [[PTR]].const 36 +; PIC-DIS-NEXT: global.get 1 +; PIC-DIS-NEXT: [[PTR]].add +; DIS-NEXT: i32.const 0 +; DIS-NEXT: i32.const 10000 +; DIS-NEXT: memory.fill 0 ; NOPIC-DIS-NEXT: [[PTR]].const 11060 ; PIC-DIS-NEXT: local.get 0 diff --git a/lld/test/wasm/shared-memory-bss.s b/lld/test/wasm/shared-memory-bss.s new file mode 100644 --- /dev/null +++ b/lld/test/wasm/shared-memory-bss.s @@ -0,0 +1,89 @@ +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s +# RUN: wasm-ld --experimental-pic -shared --shared-memory -o %t.so %t.o +# RUN: llvm-objdump -d --no-show-raw-insn --no-leading-addr %t.so | FileCheck %s +# RUN: obj2yaml %t.so | FileCheck %s --check-prefix=YAML + +.section .bss.foo,"",@ +.globl foo +.p2align 2 +foo: + .int32 0 + .size foo, 4 + +.section .data.bar,"",@ +.globl bar +.p2align 2 +bar: + .int32 42 + .size bar, 4 + +.section .custom_section.target_features,"",@ + .int8 2 + .int8 43 + .int8 7 + .ascii "atomics" + .int8 43 + .int8 11 + .ascii "bulk-memory" + +# Verify that there is oly a single data segment and no bss +# in the binary: + +# YAML: - Type: DATA{{$}} +# YAML-NEXT: Segments: +# YAML-NEXT: - SectionOffset: 3 +# YAML-NEXT: InitFlags: 1 +# YAML-NEXT: Content: 2A000000 +# YAML-NEXT: - Type: CUSTOM + +# CHECK: <__wasm_init_memory>: +# CHECK-NEXT: .local i32 +# CHECK-NEXT: global.get 0 +# CHECK-NEXT: i32.const 8 +# CHECK-NEXT: i32.add +# CHECK-NEXT: local.set 0 +# CHECK-NEXT: block +# CHECK-NEXT: block +# CHECK-NEXT: block +# CHECK-NEXT: local.get 0 +# CHECK-NEXT: i32.const 0 +# CHECK-NEXT: i32.const 1 +# CHECK-NEXT: i32.atomic.rmw.cmpxchg 0 +# CHECK-NEXT: br_table {0, 1, 2} # 1: down to label1 +# CHECK-NEXT: # 2: down to label0 +# CHECK-NEXT: end +# CHECK-NEXT: i32.const 0 +# CHECK-NEXT: global.get 0 +# CHECK-NEXT: i32.add +# CHECK-NEXT: i32.const 0 +# CHECK-NEXT: i32.const 4 + +# Regular data gets initialized with memory.init + +# CHECK-NEXT: memory.init 0, 0 +# CHECK-NEXT: i32.const 4 +# CHECK-NEXT: global.get 0 +# CHECK-NEXT: i32.add +# CHECK-NEXT: i32.const 0 +# CHECK-NEXT: i32.const 4 + +# BSS gets initialized with memory.fill + +# CHECK-NEXT: memory.fill 0 +# CHECK-NEXT: local.get 0 +# CHECK-NEXT: i32.const 2 +# CHECK-NEXT: i32.atomic.store 0 +# CHECK-NEXT: local.get 0 +# CHECK-NEXT: i32.const -1 +# CHECK-NEXT: memory.atomic.notify 0 +# CHECK-NEXT: drop +# CHECK-NEXT: br 1 # 1: down to label1 +# CHECK-NEXT: end +# CHECK-NEXT: local.get 0 +# CHECK-NEXT: i32.const 1 +# CHECK-NEXT: i64.const -1 +# CHECK-NEXT: memory.atomic.wait32 0 +# CHECK-NEXT: drop +# CHECK-NEXT: end +# CHECK-NEXT: data.drop 0 +# CHECK-NEXT: end diff --git a/lld/test/wasm/tls.s b/lld/test/wasm/tls.s --- a/lld/test/wasm/tls.s +++ b/lld/test/wasm/tls.s @@ -87,7 +87,7 @@ # CHECK-NEXT: Mutable: true # CHECK-NEXT: InitExpr: # CHECK-NEXT: Opcode: I32_CONST -# CHECK-NEXT: Value: 66576 +# CHECK-NEXT: Value: 66592 # __tls_base # CHECK-NEXT: - Index: 1 diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -846,9 +846,7 @@ s->initFlags = WASM_DATA_SEGMENT_IS_PASSIVE; // Exported memories are guaranteed to be zero-initialized, so no need // to emit data segments for bss sections. - // TODO: consider initializing bss sections with memory.fill - // instructions when memory is imported and bulk-memory is available. - if (!config->importMemory && !config->relocatable && name.startswith(".bss")) + if (!config->relocatable && name.startswith(".bss")) s->isBss = true; segments.push_back(s); return s; @@ -944,8 +942,14 @@ } bool Writer::needsPassiveInitialization(const OutputSegment *segment) { - return segment->initFlags & WASM_DATA_SEGMENT_IS_PASSIVE && - !segment->isTLS() && !segment->isBss; + // TLS segments are initialized separately + if (segment->isTLS()) + return false; + // Passive initilization of bss segments (via memory.fill) is required + // when memory is imported. + if (config->importMemory && segment->isBss) + return true; + return segment->initFlags & WASM_DATA_SEGMENT_IS_PASSIVE; } bool Writer::hasPassiveInitializedSegments() { @@ -963,7 +967,9 @@ // Passive segments are used to avoid memory being reinitialized on each // thread's instantiation. These passive segments are initialized and // dropped in __wasm_init_memory, which is registered as the start function - if (config->sharedMemory && hasPassiveInitializedSegments()) { + // We also initialize bss segments (using memory.fill) as part of this + // function. + if (hasPassiveInitializedSegments()) { WasmSym::initMemory = symtab->addSyntheticFunction( "__wasm_init_memory", WASM_SYMBOL_VISIBILITY_HIDDEN, make(nullSignature, "__wasm_init_memory")); @@ -1005,9 +1011,12 @@ void Writer::createInitMemoryFunction() { LLVM_DEBUG(dbgs() << "createInitMemoryFunction\n"); assert(WasmSym::initMemory); - assert(WasmSym::initMemoryFlag); assert(hasPassiveInitializedSegments()); - uint64_t flagAddress = WasmSym::initMemoryFlag->getVA(); + uint64_t flagAddress; + if (config->sharedMemory) { + assert(WasmSym::initMemoryFlag); + flagAddress = WasmSym::initMemoryFlag->getVA(); + } bool is64 = config->is64.getValueOr(false); std::string bodyContent; { @@ -1063,21 +1072,6 @@ // (i32.const $__init_memory_flag) // (i32.const 1) - // With PIC code we cache the flag address in local 0 - if (config->isPic) { - writeUleb128(os, 1, "num local decls"); - writeUleb128(os, 1, "local count"); - writeU8(os, is64 ? WASM_TYPE_I64 : WASM_TYPE_I32, "address type"); - writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET"); - writeUleb128(os, WasmSym::memoryBase->getGlobalIndex(), "memory_base"); - writePtrConst(os, flagAddress, is64, "flag address"); - writeU8(os, is64 ? WASM_OPCODE_I64_ADD : WASM_OPCODE_I32_ADD, "add"); - writeU8(os, WASM_OPCODE_LOCAL_SET, "local.set"); - writeUleb128(os, 0, "local 0"); - } else { - writeUleb128(os, 0, "num locals"); - } - auto writeGetFlagAddress = [&]() { if (config->isPic) { writeU8(os, WASM_OPCODE_LOCAL_GET, "local.get"); @@ -1087,34 +1081,57 @@ } }; - // Set up destination blocks - writeU8(os, WASM_OPCODE_BLOCK, "block $drop"); - writeU8(os, WASM_TYPE_NORESULT, "block type"); - writeU8(os, WASM_OPCODE_BLOCK, "block $wait"); - writeU8(os, WASM_TYPE_NORESULT, "block type"); - writeU8(os, WASM_OPCODE_BLOCK, "block $init"); - writeU8(os, WASM_TYPE_NORESULT, "block type"); - - // Atomically check whether we win the race. - writeGetFlagAddress(); - writeI32Const(os, 0, "expected flag value"); - writeI32Const(os, 1, "new flag value"); - writeU8(os, WASM_OPCODE_ATOMICS_PREFIX, "atomics prefix"); - writeUleb128(os, WASM_OPCODE_I32_RMW_CMPXCHG, "i32.atomic.rmw.cmpxchg"); - writeMemArg(os, 2, 0); - - // Based on the value, decide what to do next. - writeU8(os, WASM_OPCODE_BR_TABLE, "br_table"); - writeUleb128(os, 2, "label vector length"); - writeUleb128(os, 0, "label $init"); - writeUleb128(os, 1, "label $wait"); - writeUleb128(os, 2, "default label $drop"); - - // Initialize passive data segments - writeU8(os, WASM_OPCODE_END, "end $init"); + if (config->sharedMemory) { + // With PIC code we cache the flag address in local 0 + if (config->isPic) { + writeUleb128(os, 1, "num local decls"); + writeUleb128(os, 1, "local count"); + writeU8(os, is64 ? WASM_TYPE_I64 : WASM_TYPE_I32, "address type"); + writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET"); + writeUleb128(os, WasmSym::memoryBase->getGlobalIndex(), "memory_base"); + writePtrConst(os, flagAddress, is64, "flag address"); + writeU8(os, is64 ? WASM_OPCODE_I64_ADD : WASM_OPCODE_I32_ADD, "add"); + writeU8(os, WASM_OPCODE_LOCAL_SET, "local.set"); + writeUleb128(os, 0, "local 0"); + } else { + writeUleb128(os, 0, "num locals"); + } + + // Set up destination blocks + writeU8(os, WASM_OPCODE_BLOCK, "block $drop"); + writeU8(os, WASM_TYPE_NORESULT, "block type"); + writeU8(os, WASM_OPCODE_BLOCK, "block $wait"); + writeU8(os, WASM_TYPE_NORESULT, "block type"); + writeU8(os, WASM_OPCODE_BLOCK, "block $init"); + writeU8(os, WASM_TYPE_NORESULT, "block type"); + + // Atomically check whether we win the race. + writeGetFlagAddress(); + writeI32Const(os, 0, "expected flag value"); + writeI32Const(os, 1, "new flag value"); + writeU8(os, WASM_OPCODE_ATOMICS_PREFIX, "atomics prefix"); + writeUleb128(os, WASM_OPCODE_I32_RMW_CMPXCHG, "i32.atomic.rmw.cmpxchg"); + writeMemArg(os, 2, 0); + + // Based on the value, decide what to do next. + writeU8(os, WASM_OPCODE_BR_TABLE, "br_table"); + writeUleb128(os, 2, "label vector length"); + writeUleb128(os, 0, "label $init"); + writeUleb128(os, 1, "label $wait"); + writeUleb128(os, 2, "default label $drop"); + + // Initialize passive data segments + writeU8(os, WASM_OPCODE_END, "end $init"); + } else { + writeUleb128(os, 0, "num local decls"); + } + for (const OutputSegment *s : segments) { if (needsPassiveInitialization(s)) { - // destination address + // For passive BSS segments we can simple issue a memory.fill(0). + // For non-BSS segments we do a memory.init. Both these + // instructions take as thier first argument the destination + // address. writePtrConst(os, s->startVA, is64, "destination address"); if (config->isPic) { writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET"); @@ -1123,52 +1140,60 @@ writeU8(os, is64 ? WASM_OPCODE_I64_ADD : WASM_OPCODE_I32_ADD, "i32.add"); } - // source segment offset - writeI32Const(os, 0, "segment offset"); - // memory region size - writeI32Const(os, s->size, "memory region size"); - // memory.init instruction - writeU8(os, WASM_OPCODE_MISC_PREFIX, "bulk-memory prefix"); - writeUleb128(os, WASM_OPCODE_MEMORY_INIT, "memory.init"); - writeUleb128(os, s->index, "segment index immediate"); - writeU8(os, 0, "memory index immediate"); + if (s->isBss) { + writeI32Const(os, 0, "fill value"); + writeI32Const(os, s->size, "memory region size"); + writeU8(os, WASM_OPCODE_MISC_PREFIX, "bulk-memory prefix"); + writeUleb128(os, WASM_OPCODE_MEMORY_FILL, "memory.fill"); + writeU8(os, 0, "memory index immediate"); + } else { + writeI32Const(os, 0, "source segment offset"); + writeI32Const(os, s->size, "memory region size"); + writeU8(os, WASM_OPCODE_MISC_PREFIX, "bulk-memory prefix"); + writeUleb128(os, WASM_OPCODE_MEMORY_INIT, "memory.init"); + writeUleb128(os, s->index, "segment index immediate"); + writeU8(os, 0, "memory index immediate"); + } } } - // Set flag to 2 to mark end of initialization - writeGetFlagAddress(); - writeI32Const(os, 2, "flag value"); - writeU8(os, WASM_OPCODE_ATOMICS_PREFIX, "atomics prefix"); - writeUleb128(os, WASM_OPCODE_I32_ATOMIC_STORE, "i32.atomic.store"); - writeMemArg(os, 2, 0); - - // Notify any waiters that memory initialization is complete - writeGetFlagAddress(); - writeI32Const(os, -1, "number of waiters"); - writeU8(os, WASM_OPCODE_ATOMICS_PREFIX, "atomics prefix"); - writeUleb128(os, WASM_OPCODE_ATOMIC_NOTIFY, "atomic.notify"); - writeMemArg(os, 2, 0); - writeU8(os, WASM_OPCODE_DROP, "drop"); - - // Branch to drop the segments - writeU8(os, WASM_OPCODE_BR, "br"); - writeUleb128(os, 1, "label $drop"); - - // Wait for the winning thread to initialize memory - writeU8(os, WASM_OPCODE_END, "end $wait"); - writeGetFlagAddress(); - writeI32Const(os, 1, "expected flag value"); - writeI64Const(os, -1, "timeout"); - - writeU8(os, WASM_OPCODE_ATOMICS_PREFIX, "atomics prefix"); - writeUleb128(os, WASM_OPCODE_I32_ATOMIC_WAIT, "i32.atomic.wait"); - writeMemArg(os, 2, 0); - writeU8(os, WASM_OPCODE_DROP, "drop"); - - // Unconditionally drop passive data segments - writeU8(os, WASM_OPCODE_END, "end $drop"); + if (config->sharedMemory) { + // Set flag to 2 to mark end of initialization + writeGetFlagAddress(); + writeI32Const(os, 2, "flag value"); + writeU8(os, WASM_OPCODE_ATOMICS_PREFIX, "atomics prefix"); + writeUleb128(os, WASM_OPCODE_I32_ATOMIC_STORE, "i32.atomic.store"); + writeMemArg(os, 2, 0); + + // Notify any waiters that memory initialization is complete + writeGetFlagAddress(); + writeI32Const(os, -1, "number of waiters"); + writeU8(os, WASM_OPCODE_ATOMICS_PREFIX, "atomics prefix"); + writeUleb128(os, WASM_OPCODE_ATOMIC_NOTIFY, "atomic.notify"); + writeMemArg(os, 2, 0); + writeU8(os, WASM_OPCODE_DROP, "drop"); + + // Branch to drop the segments + writeU8(os, WASM_OPCODE_BR, "br"); + writeUleb128(os, 1, "label $drop"); + + // Wait for the winning thread to initialize memory + writeU8(os, WASM_OPCODE_END, "end $wait"); + writeGetFlagAddress(); + writeI32Const(os, 1, "expected flag value"); + writeI64Const(os, -1, "timeout"); + + writeU8(os, WASM_OPCODE_ATOMICS_PREFIX, "atomics prefix"); + writeUleb128(os, WASM_OPCODE_I32_ATOMIC_WAIT, "i32.atomic.wait"); + writeMemArg(os, 2, 0); + writeU8(os, WASM_OPCODE_DROP, "drop"); + + // Unconditionally drop passive data segments + writeU8(os, WASM_OPCODE_END, "end $drop"); + } + for (const OutputSegment *s : segments) { - if (needsPassiveInitialization(s)) { + if (needsPassiveInitialization(s) && !s->isBss) { // data.drop instruction writeU8(os, WASM_OPCODE_MISC_PREFIX, "bulk-memory prefix"); writeUleb128(os, WASM_OPCODE_DATA_DROP, "data.drop"); diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h --- a/llvm/include/llvm/BinaryFormat/Wasm.h +++ b/llvm/include/llvm/BinaryFormat/Wasm.h @@ -298,6 +298,7 @@ WASM_OPCODE_DROP = 0x1a, WASM_OPCODE_MISC_PREFIX = 0xfc, WASM_OPCODE_MEMORY_INIT = 0x08, + WASM_OPCODE_MEMORY_FILL = 0x0b, WASM_OPCODE_DATA_DROP = 0x09, WASM_OPCODE_ATOMICS_PREFIX = 0xfe, WASM_OPCODE_ATOMIC_NOTIFY = 0x00,