diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def --- a/clang/include/clang/Basic/BuiltinsWebAssembly.def +++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def @@ -29,6 +29,9 @@ TARGET_BUILTIN(__builtin_wasm_memory_init, "vIUiIUiv*UiUi", "", "bulk-memory") TARGET_BUILTIN(__builtin_wasm_data_drop, "vIUi", "", "bulk-memory") +// Thread-local storage +TARGET_BUILTIN(__builtin_wasm_tls_size, "z", "nc", "bulk-memory") + // Floating point min/max BUILTIN(__builtin_wasm_min_f32, "fff", "nc") BUILTIN(__builtin_wasm_max_f32, "fff", "nc") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -13888,6 +13888,11 @@ Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_data_drop); return Builder.CreateCall(Callee, {Arg}); } + case WebAssembly::BI__builtin_wasm_tls_size: { + llvm::Type *ResultType = ConvertType(E->getType()); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType); + return Builder.CreateCall(Callee); + } case WebAssembly::BI__builtin_wasm_throw: { Value *Tag = EmitScalarExpr(E->getArg(0)); Value *Obj = EmitScalarExpr(E->getArg(1)); diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c --- a/clang/test/CodeGen/builtins-wasm.c +++ b/clang/test/CodeGen/builtins-wasm.c @@ -38,6 +38,12 @@ // WEBASSEMBLY64: call void @llvm.wasm.data.drop(i32 3) } +__SIZE_TYPE__ tls_size() { + return __builtin_wasm_tls_size(); + // WEBASSEMBLY32: call i32 @llvm.wasm.tls.size.i32() + // WEBASSEMBLY64: call i64 @llvm.wasm.tls.size.i64() +} + void throw(void *obj) { return __builtin_wasm_throw(0, obj); // WEBASSEMBLY32: call void @llvm.wasm.throw(i32 0, i8* %{{.*}}) diff --git a/lld/test/wasm/data-layout.ll b/lld/test/wasm/data-layout.ll --- a/lld/test/wasm/data-layout.ll +++ b/lld/test/wasm/data-layout.ll @@ -27,13 +27,18 @@ ; CHECK-NEXT: InitExpr: ; CHECK-NEXT: Opcode: I32_CONST ; CHECK-NEXT: Value: 66608 -; CHECK-NEXT: - Index: 1 + +; We skip __tls_base and __tls_size. +; The CHECK-NOT ensures that we do not go to the next section. +; CHECK-NOT: - Type: + +; CHECK: - Index: 3 ; CHECK-NEXT: Type: I32 ; CHECK-NEXT: Mutable: false ; CHECK-NEXT: InitExpr: ; CHECK-NEXT: Opcode: I32_CONST ; CHECK-NEXT: Value: 1071 -; CHECK-NEXT: - Index: 2 +; CHECK-NEXT: - Index: 4 ; CHECK-NEXT: Type: I32 ; CHECK-NEXT: Mutable: false ; CHECK-NEXT: InitExpr: diff --git a/lld/test/wasm/data-segment-merging.ll b/lld/test/wasm/data-segment-merging.ll --- a/lld/test/wasm/data-segment-merging.ll +++ b/lld/test/wasm/data-segment-merging.ll @@ -28,6 +28,8 @@ ; MERGE-NEXT: FunctionNames: ; MERGE-NEXT: - Index: 0 ; MERGE-NEXT: Name: __wasm_call_ctors +; MERGE-NEXT: - Index: 1 +; MERGE-NEXT: Name: __wasm_init_tls ; MERGE-NOT: - Index: ; RUN: wasm-ld -no-gc-sections --no-entry --no-merge-data-segments -o %t.separate.wasm %t.o @@ -65,6 +67,8 @@ ; SEPARATE-NEXT: FunctionNames: ; SEPARATE-NEXT: - Index: 0 ; SEPARATE-NEXT: Name: __wasm_call_ctors +; SEPARATE-NEXT: - Index: 1 +; SEPARATE-NEXT: Name: __wasm_init_tls ; SEPARATE-NOT: - Index: ; RUN: wasm-ld -no-gc-sections --no-entry --passive-segments -o %t.merged.passive.wasm %t.o @@ -87,6 +91,8 @@ ; PASSIVE-MERGE-NEXT: Name: __wasm_call_ctors ; PASSIVE-MERGE-NEXT: - Index: 1 ; PASSIVE-MERGE-NEXT: Name: __wasm_init_memory +; PASSIVE-MERGE-NEXT: - Index: 2 +; PASSIVE-MERGE-NEXT: Name: __wasm_init_tls ; PASSIVE-MERGE-NOT: - Index: ; RUN: wasm-ld -no-gc-sections --no-entry --passive-segments -no-merge-data-segments -o %t.separate.passive.wasm %t.o @@ -121,4 +127,6 @@ ; PASSIVE-SEPARATE-NEXT: Name: __wasm_call_ctors ; PASSIVE-SEPARATE-NEXT: - Index: 1 ; PASSIVE-SEPARATE-NEXT: Name: __wasm_init_memory +; PASSIVE-SEPARATE-NEXT: - Index: 2 +; PASSIVE-SEPARATE-NEXT: Name: __wasm_init_tls ; PASSIVE-SEPARATE-NOT: - Index diff --git a/lld/test/wasm/data-segments.ll b/lld/test/wasm/data-segments.ll --- a/lld/test/wasm/data-segments.ll +++ b/lld/test/wasm/data-segments.ll @@ -54,6 +54,9 @@ ; ACTIVE-NEXT: - Index: 0 ; ACTIVE-NEXT: Locals: [] ; ACTIVE-NEXT: Body: 0B +; ACTIVE-NEXT: - Index: 1 +; ACTIVE-NEXT: Locals: [] +; ACTIVE-NEXT: Body: 0B ; ACTIVE-NEXT: - Type: DATA ; ACTIVE-NEXT: Segments: ; ACTIVE-NEXT: - SectionOffset: 7 @@ -73,6 +76,8 @@ ; ACTIVE-NEXT: FunctionNames: ; ACTIVE-NEXT: - Index: 0 ; ACTIVE-NEXT: Name: __wasm_call_ctors +; ACTIVE-NEXT: - Index: 1 +; ACTIVE-NEXT: Name: __wasm_init_tls ; PASSIVE-LABEL: - Type: CODE ; PASSIVE-NEXT: Functions: @@ -82,6 +87,9 @@ ; PASSIVE-NEXT: - Index: 1 ; PASSIVE-NEXT: Locals: [] ; PASSIVE-NEXT: Body: 4180084100411CFC080000FC0900419C084100410DFC080100FC09010B +; PASSIVE-NEXT: - Index: 2 +; PASSIVE-NEXT: Locals: [] +; PASSIVE-NEXT: Body: 0B ; PASSIVE-NEXT: - Type: DATA ; PASSIVE-NEXT: Segments: ; PASSIVE-NEXT: - SectionOffset: 3 @@ -97,3 +105,5 @@ ; PASSIVE-NEXT: Name: __wasm_call_ctors ; PASSIVE-NEXT: - Index: 1 ; PASSIVE-NEXT: Name: __wasm_init_memory +; PASSIVE-NEXT: - Index: 2 +; PASSIVE-NEXT: Name: __wasm_init_tls diff --git a/lld/test/wasm/gc-imports.ll b/lld/test/wasm/gc-imports.ll --- a/lld/test/wasm/gc-imports.ll +++ b/lld/test/wasm/gc-imports.ll @@ -83,9 +83,11 @@ ; NO-GC-NEXT: - Index: 2 ; NO-GC-NEXT: Name: __wasm_call_ctors ; NO-GC-NEXT: - Index: 3 -; NO-GC-NEXT: Name: foo +; NO-GC-NEXT: Name: __wasm_init_tls ; NO-GC-NEXT: - Index: 4 -; NO-GC-NEXT: Name: _start +; NO-GC-NEXT: Name: foo ; NO-GC-NEXT: - Index: 5 +; NO-GC-NEXT: Name: _start +; NO-GC-NEXT: - Index: 6 ; NO-GC-NEXT: Name: use_undef_global ; NO-GC-NEXT: ... diff --git a/lld/test/wasm/gc-sections.ll b/lld/test/wasm/gc-sections.ll --- a/lld/test/wasm/gc-sections.ll +++ b/lld/test/wasm/gc-sections.ll @@ -92,13 +92,17 @@ ; NO-GC-NEXT: ReturnType: NORESULT ; NO-GC-NEXT: ParamTypes: ; NO-GC-NEXT: - Index: 1 +; NO-GC-NEXT: ReturnType: NORESULT +; NO-GC-NEXT: ParamTypes: +; NO-GC-NEXT: - I32 +; NO-GC-NEXT: - Index: 2 ; NO-GC-NEXT: ReturnType: I64 ; NO-GC-NEXT: ParamTypes: ; NO-GC-NEXT: - I64 -; NO-GC-NEXT: - Index: 2 +; NO-GC-NEXT: - Index: 3 ; NO-GC-NEXT: ReturnType: I32 ; NO-GC-NEXT: ParamTypes: -; NO-GC-NEXT: - Index: 3 +; NO-GC-NEXT: - Index: 4 ; NO-GC-NEXT: ReturnType: I64 ; NO-GC-NEXT: ParamTypes: ; NO-GC-NEXT: - Type: FUNCTION @@ -112,12 +116,24 @@ ; NO-GC-NEXT: Opcode: I32_CONST ; NO-GC-NEXT: Value: 66576 ; NO-GC-NEXT: - Index: 1 +; NO-GC-NEXT: Type: I32 +; NO-GC-NEXT: Mutable: true +; NO-GC-NEXT: InitExpr: +; NO-GC-NEXT: Opcode: I32_CONST +; NO-GC-NEXT: Value: 0 +; NO-GC-NEXT: - Index: 2 +; NO-GC-NEXT: Type: I32 +; NO-GC-NEXT: Mutable: false +; NO-GC-NEXT: InitExpr: +; NO-GC-NEXT: Opcode: I32_CONST +; NO-GC-NEXT: Value: 0 +; NO-GC-NEXT: - Index: 3 ; NO-GC-NEXT: Type: I64 ; NO-GC-NEXT: Mutable: true ; NO-GC-NEXT: InitExpr: ; NO-GC-NEXT: Opcode: I64_CONST ; NO-GC-NEXT: Value: 123 -; NO-GC-NEXT: - Index: 2 +; NO-GC-NEXT: - Index: 4 ; NO-GC-NEXT: Type: I64 ; NO-GC-NEXT: Mutable: true ; NO-GC-NEXT: InitExpr: @@ -138,12 +154,14 @@ ; NO-GC-NEXT: - Index: 0 ; NO-GC-NEXT: Name: __wasm_call_ctors ; NO-GC-NEXT: - Index: 1 -; NO-GC-NEXT: Name: unused_function +; NO-GC-NEXT: Name: __wasm_init_tls ; NO-GC-NEXT: - Index: 2 -; NO-GC-NEXT: Name: used_function +; NO-GC-NEXT: Name: unused_function ; NO-GC-NEXT: - Index: 3 -; NO-GC-NEXT: Name: _start +; NO-GC-NEXT: Name: used_function ; NO-GC-NEXT: - Index: 4 +; NO-GC-NEXT: Name: _start +; NO-GC-NEXT: - Index: 5 ; NO-GC-NEXT: Name: use_global ; NO-GC-NEXT: ... diff --git a/lld/test/wasm/load-undefined.test b/lld/test/wasm/load-undefined.test --- a/lld/test/wasm/load-undefined.test +++ b/lld/test/wasm/load-undefined.test @@ -16,10 +16,12 @@ ; CHECK-NEXT: - Index: 0 ; CHECK-NEXT: Name: __wasm_call_ctors ; CHECK-NEXT: - Index: 1 -; CHECK-NEXT: Name: _start +; CHECK-NEXT: Name: __wasm_init_tls ; CHECK-NEXT: - Index: 2 -; CHECK-NEXT: Name: ret64 +; CHECK-NEXT: Name: _start ; CHECK-NEXT: - Index: 3 +; CHECK-NEXT: Name: ret64 +; CHECK-NEXT: - Index: 4 ; CHECK-NEXT: Name: ret32 ; CHECK-NEXT: ... @@ -28,8 +30,10 @@ ; NO-LOAD-NEXT: - Index: 0 ; NO-LOAD-NEXT: Name: __wasm_call_ctors ; NO-LOAD-NEXT: - Index: 1 -; NO-LOAD-NEXT: Name: _start +; NO-LOAD-NEXT: Name: __wasm_init_tls ; NO-LOAD-NEXT: - Index: 2 +; NO-LOAD-NEXT: Name: _start +; NO-LOAD-NEXT: - Index: 3 ; NO-LOAD-NEXT: Name: ret64 ; NO-LOAD-NEXT: ... diff --git a/lld/test/wasm/tls.ll b/lld/test/wasm/tls.ll new file mode 100644 --- /dev/null +++ b/lld/test/wasm/tls.ll @@ -0,0 +1,80 @@ +; RUN: llc -mattr=+bulk-memory -filetype=obj %s -o %t.o + +target triple = "wasm32-unknown-unknown" + +@tls1 = thread_local(localexec) global i32 1, align 4 +@tls2 = thread_local(localexec) global i32 1, align 4 + +define i32* @tls1_addr() { + ret i32* @tls1 +} + +define i32* @tls2_addr() { + ret i32* @tls2 +} + +; RUN: wasm-ld -no-gc-sections --allow-undefined --no-entry -o %t.wasm %t.o +; RUN: obj2yaml %t.wasm | FileCheck %s + +; RUN: wasm-ld -no-gc-sections --no-merge-data-segments --allow-undefined --no-entry -o %t.wasm %t.o +; RUN: obj2yaml %t.wasm | FileCheck %s + +; CHECK: - Type: GLOBAL +; CHECK-NEXT: Globals: +; CHECK-NEXT: - Index: 0 +; CHECK-NEXT: Type: I32 +; CHECK-NEXT: Mutable: true +; CHECK-NEXT: InitExpr: +; CHECK-NEXT: Opcode: I32_CONST +; CHECK-NEXT: Value: 66576 +; CHECK-NEXT: - Index: 1 +; CHECK-NEXT: Type: I32 +; CHECK-NEXT: Mutable: true +; CHECK-NEXT: InitExpr: +; CHECK-NEXT: Opcode: I32_CONST +; CHECK-NEXT: Value: 0 +; CHECK-NEXT: - Index: 2 +; CHECK-NEXT: Type: I32 +; CHECK-NEXT: Mutable: false +; CHECK-NEXT: InitExpr: +; CHECK-NEXT: Opcode: I32_CONST +; CHECK-NEXT: Value: 8 + + +; CHECK: - Type: CODE +; CHECK-NEXT: Functions: +; CHECK-NEXT: - Index: 0 +; CHECK-NEXT: Locals: [] +; CHECK-NEXT: Body: 0B +; CHECK-NEXT: - Index: 1 +; CHECK-NEXT: Locals: [] +; CHECK-NEXT: Body: 20002401200041004108FC0800000B + +; Expected body of __wasm_init_tls: +; local.get 0 +; global.set 1 +; local.get 0 +; i32.const 0 +; i32.const 8 +; memory.init 0, 0 +; end + +; CHECK-NEXT: - Index: 2 +; CHECK-NEXT: Locals: [] +; CHECK-NEXT: Body: 2381808080004180808080006A0B + +; Expected body of tls1_addr: +; global.get 1 +; i32.const 0 +; i32.add +; end + +; CHECK-NEXT: - Index: 3 +; CHECK-NEXT: Locals: [] +; CHECK-NEXT: Body: 2381808080004184808080006A0B + +; Expected body of tls1_addr: +; global.get 1 +; i32.const 4 +; i32.add +; end diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -454,6 +454,7 @@ // Create ABI-defined synthetic symbols static void createSyntheticSymbols() { static WasmSignature nullSignature = {{}, {}}; + static WasmSignature i32ArgSignature = {{}, {ValType::I32}}; static llvm::wasm::WasmGlobalType globalTypeI32 = {WASM_TYPE_I32, false}; static llvm::wasm::WasmGlobalType mutableGlobalTypeI32 = {WASM_TYPE_I32, true}; @@ -516,6 +517,28 @@ WasmSym::heapBase = symtab->addOptionalDataSymbol("__heap_base"); } + llvm::wasm::WasmGlobal tlsBaseGlobal; + tlsBaseGlobal.Type = {WASM_TYPE_I32, true}; + tlsBaseGlobal.InitExpr.Value.Int32 = 0; + tlsBaseGlobal.InitExpr.Opcode = WASM_OPCODE_I32_CONST; + tlsBaseGlobal.SymbolName = "__tls_base"; + WasmSym::tlsBase = + symtab->addSyntheticGlobal("__tls_base", WASM_SYMBOL_VISIBILITY_HIDDEN, + make(tlsBaseGlobal, nullptr)); + + llvm::wasm::WasmGlobal tlsSizeGlobal; + tlsSizeGlobal.Type = {WASM_TYPE_I32, false}; + tlsSizeGlobal.InitExpr.Value.Int32 = 0; + tlsSizeGlobal.InitExpr.Opcode = WASM_OPCODE_I32_CONST; + tlsSizeGlobal.SymbolName = "__tls_size"; + WasmSym::tlsSize = + symtab->addSyntheticGlobal("__tls_size", WASM_SYMBOL_VISIBILITY_HIDDEN, + make(tlsSizeGlobal, nullptr)); + + WasmSym::initTLS = symtab->addSyntheticFunction( + "__wasm_init_tls", WASM_SYMBOL_VISIBILITY_HIDDEN, + make(i32ArgSignature, "__wasm_init_tls")); + WasmSym::dsoHandle = symtab->addSyntheticDataSymbol( "__dso_handle", WASM_SYMBOL_VISIBILITY_HIDDEN); } diff --git a/lld/wasm/Symbols.h b/lld/wasm/Symbols.h --- a/lld/wasm/Symbols.h +++ b/lld/wasm/Symbols.h @@ -426,6 +426,15 @@ // linear memory. static GlobalSymbol *stackPointer; + // __tls_base + // Global that holds the address of the base of the current thread's + // TLS block. + static GlobalSymbol *tlsBase; + + // __tls_size + // Symbol whose value is the size of the TLS block. + static GlobalSymbol *tlsSize; + // __data_end // Symbol marking the end of the data and bss. static DefinedData *dataEnd; @@ -448,6 +457,10 @@ // Function that applies relocations to data segment post-instantiation. static DefinedFunction *applyRelocs; + // __wasm_init_tls + // Function that allocates thread-local storage and initializes it. + static DefinedFunction *initTLS; + // __dso_handle // Symbol used in calls to __cxa_atexit to determine current DLL static DefinedData *dsoHandle; diff --git a/lld/wasm/Symbols.cpp b/lld/wasm/Symbols.cpp --- a/lld/wasm/Symbols.cpp +++ b/lld/wasm/Symbols.cpp @@ -27,11 +27,14 @@ DefinedFunction *WasmSym::callCtors; DefinedFunction *WasmSym::initMemory; DefinedFunction *WasmSym::applyRelocs; +DefinedFunction *WasmSym::initTLS; DefinedData *WasmSym::dsoHandle; DefinedData *WasmSym::dataEnd; DefinedData *WasmSym::globalBase; DefinedData *WasmSym::heapBase; GlobalSymbol *WasmSym::stackPointer; +GlobalSymbol *WasmSym::tlsBase; +GlobalSymbol *WasmSym::tlsSize; UndefinedGlobal *WasmSym::tableBase; UndefinedGlobal *WasmSym::memoryBase; @@ -200,8 +203,14 @@ uint32_t DefinedData::getVirtualAddress() const { LLVM_DEBUG(dbgs() << "getVirtualAddress: " << getName() << "\n"); - if (segment) + if (segment) { + // For thread local data, the symbol location is relative to the start of + // the .tdata section, since they are used as offsets from __tls_base. + // Hence, we do not add in segment->outputSeg->startVA. + if (segment->outputSeg->name == ".tdata") + return segment->outputSegmentOffset + offset; return segment->outputSeg->startVA + segment->outputSegmentOffset + offset; + } return offset; } diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -57,6 +57,7 @@ void createInitMemoryFunction(); void createApplyRelocationsFunction(); void createCallCtorsFunction(); + void createInitTLSFunction(); void assignIndexes(); void populateSymtab(); @@ -242,6 +243,11 @@ log(formatv("mem: {0,-15} offset={1,-8} size={2,-8} align={3}", seg->name, memoryPtr, seg->size, seg->alignment)); memoryPtr += seg->size; + + if (WasmSym::tlsSize && seg->name == ".tdata") { + auto *tlsSize = cast(WasmSym::tlsSize); + tlsSize->global->global.InitExpr.Value.Int32 = seg->size; + } } // TODO: Add .bss space here. @@ -353,6 +359,7 @@ StringMap used; StringMap required; StringMap disallowed; + bool tlsUsed = false; // Only infer used features if user did not specify features bool inferFeatures = !config->features.hasValue(); @@ -385,6 +392,14 @@ std::to_string(feature.Prefix)); } } + + for (InputSegment *segment : file->segments) { + if (!segment->live) + continue; + StringRef name = segment->getName(); + if (name.startswith(".tdata.") || name.startswith(".tbss.")) + tlsUsed = true; + } } if (inferFeatures) @@ -411,6 +426,10 @@ error("'bulk-memory' feature must be used in order to emit passive " "segments"); + if (!used.count("bulk-memory") && tlsUsed) + error("'bulk-memory' feature must be used in order to use thread-local " + "storage"); + // Validate that used features are allowed in output if (!inferFeatures) { for (auto &feature : used.keys()) { @@ -493,7 +512,7 @@ // See: https://github.com/WebAssembly/mutable-global if (g->getGlobalType()->Mutable) { // Only the __stack_pointer should ever be create as mutable. - assert(g == WasmSym::stackPointer); + assert(g == WasmSym::stackPointer || g == WasmSym::tlsBase); continue; } export_ = {name, WASM_EXTERNAL_GLOBAL, g->getGlobalIndex()}; @@ -602,6 +621,13 @@ // we only have a single __memory_base to use as our base address. if (config->isPic) return ".data"; + // We only support one thread-local segment, so we must merge the segments + // despite --no-merge-data-segments. + if (name.startswith(".tdata.")) + return ".tdata"; + // Merge .tbss into .tdata so that they share the same offsets. + if (name.startswith(".tbss.")) + return ".tdata"; if (!config->mergeDataSegments) return name; if (name.startswith(".text.")) @@ -625,7 +651,7 @@ if (s == nullptr) { LLVM_DEBUG(dbgs() << "new segment: " << name << "\n"); s = make(name, segments.size()); - if (config->passiveSegments) + if (config->passiveSegments || name == ".tdata") s->initFlags = WASM_SEGMENT_IS_PASSIVE; segments.push_back(s); } @@ -655,7 +681,7 @@ // initialize passive data segments for (const OutputSegment *s : segments) { - if (s->initFlags & WASM_SEGMENT_IS_PASSIVE) { + if (s->initFlags & WASM_SEGMENT_IS_PASSIVE && s->name != ".tdata") { // destination address writeU8(os, WASM_OPCODE_I32_CONST, "i32.const"); writeUleb128(os, s->startVA, "destination address"); @@ -737,6 +763,54 @@ createFunction(WasmSym::callCtors, bodyContent); } +void Writer::createInitTLSFunction() { + if (!WasmSym::initTLS || !WasmSym::initTLS->isLive()) + return; + + std::string bodyContent; + { + raw_string_ostream os(bodyContent); + + OutputSegment *tlsSeg = nullptr; + for (auto *seg : segments) { + if (seg->name == ".tdata") + tlsSeg = seg; + break; + } + + if (tlsSeg) { + writeUleb128(os, 0, "num locals"); + + writeU8(os, WASM_OPCODE_LOCAL_GET, "local.get"); + writeUleb128(os, 0, "local index"); + + writeU8(os, WASM_OPCODE_GLOBAL_SET, "global.set"); + writeUleb128(os, WasmSym::tlsBase->getGlobalIndex(), "global index"); + + writeU8(os, WASM_OPCODE_LOCAL_GET, "local.get"); + writeUleb128(os, 0, "local index"); + + writeU8(os, WASM_OPCODE_I32_CONST, "i32.const"); + writeUleb128(os, 0, "segment offset"); + + writeU8(os, WASM_OPCODE_I32_CONST, "i32.const"); + writeUleb128(os, tlsSeg->size, "memory region size"); + + writeU8(os, WASM_OPCODE_MISC_PREFIX, "bulk-memory prefix"); + writeUleb128(os, WASM_OPCODE_MEMORY_INIT, "MEMORY.INIT"); + writeUleb128(os, tlsSeg->index, "segment index immediate"); + writeU8(os, 0, "memory index immediate"); + + writeU8(os, WASM_OPCODE_END, "end function"); + } else { + writeUleb128(os, 0, "num locals"); + writeU8(os, WASM_OPCODE_END, "end function"); + } + } + + createFunction(WasmSym::initTLS, bodyContent); +} + // Populate InitFunctions vector with init functions from all input objects. // This is then used either when creating the output linking section or to // synthesize the "__wasm_call_ctors" function. @@ -829,6 +903,11 @@ createCallCtorsFunction(); } + createInitTLSFunction(); + + if (errorCount()) + return; + log("-- calculateTypes"); calculateTypes(); log("-- calculateExports"); diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h --- a/llvm/include/llvm/BinaryFormat/Wasm.h +++ b/llvm/include/llvm/BinaryFormat/Wasm.h @@ -242,7 +242,9 @@ enum : unsigned { WASM_OPCODE_END = 0x0b, WASM_OPCODE_CALL = 0x10, + WASM_OPCODE_LOCAL_GET = 0x20, WASM_OPCODE_GLOBAL_GET = 0x23, + WASM_OPCODE_GLOBAL_SET = 0x24, WASM_OPCODE_I32_STORE = 0x36, WASM_OPCODE_I32_CONST = 0x41, WASM_OPCODE_I64_CONST = 0x42, diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td --- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -124,4 +124,13 @@ [llvm_i32_ty], [IntrNoDuplicate, IntrHasSideEffects, ImmArg<0>]>; +//===----------------------------------------------------------------------===// +// Thread-local storage intrinsics +//===----------------------------------------------------------------------===// + +def int_wasm_tls_size : + Intrinsic<[llvm_anyint_ty], + [], + [IntrNoMem, IntrSpeculatable]>; + } // TargetPrefix = "wasm" diff --git a/llvm/include/llvm/MC/MCSectionWasm.h b/llvm/include/llvm/MC/MCSectionWasm.h --- a/llvm/include/llvm/MC/MCSectionWasm.h +++ b/llvm/include/llvm/MC/MCSectionWasm.h @@ -66,7 +66,8 @@ bool isVirtualSection() const override; bool isWasmData() const { - return Kind.isGlobalWriteableData() || Kind.isReadOnly(); + return Kind.isGlobalWriteableData() || Kind.isReadOnly() || + Kind.isThreadLocal(); } bool isUnique() const { return UniqueID != ~0U; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -233,6 +233,8 @@ return false; if (Addr.getGlobalValue()) return false; + if (GV->isThreadLocal()) + return false; Addr.setGlobalValue(GV); return true; } @@ -614,6 +616,8 @@ if (const GlobalValue *GV = dyn_cast(C)) { if (TLI.isPositionIndependent()) return 0; + if (GV->isThreadLocal()) + return 0; unsigned ResultReg = createResultReg(Subtarget->hasAddr64() ? &WebAssembly::I64RegClass : &WebAssembly::I32RegClass); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def --- a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def @@ -32,5 +32,7 @@ HANDLE_NODETYPE(THROW) HANDLE_NODETYPE(MEMORY_COPY) HANDLE_NODETYPE(MEMORY_FILL) +HANDLE_NODETYPE(GLOBAL_GET) +HANDLE_NODETYPE(CONST) // add memory opcodes starting at ISD::FIRST_TARGET_MEMORY_OPCODE here... diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -99,6 +99,7 @@ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -69,6 +69,7 @@ computeRegisterProperties(Subtarget->getRegisterInfo()); setOperationAction(ISD::GlobalAddress, MVTPtr, Custom); + setOperationAction(ISD::GlobalTLSAddress, MVTPtr, Custom); setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom); setOperationAction(ISD::JumpTable, MVTPtr, Custom); setOperationAction(ISD::BlockAddress, MVTPtr, Custom); @@ -937,6 +938,8 @@ return LowerFrameIndex(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + case ISD::GlobalTLSAddress: + return LowerGlobalTLSAddress(Op, DAG); case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); case ISD::JumpTable: @@ -1087,6 +1090,31 @@ GA->getOffset(), OperandFlags)); } +SDValue +WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + const auto *GA = cast(Op); + + assert(!Subtarget->hasAddr64() && "Should define and use GLOBAL_GET_I64"); + + if (GA->getGlobal()->getThreadLocalMode() != GlobalValue::LocalExecTLSModel) { + fail(DL, DAG, "Only -ftls-model=local-exec is supported for now"); + return SDValue(); + } + + SDValue TLSBase = + DAG.getNode(WebAssemblyISD::GLOBAL_GET, DL, MVT::i32, + DAG.getTargetExternalSymbol("__tls_base", MVT::i32)); + + SDValue TLSOffset = DAG.getNode( + WebAssemblyISD::CONST, DL, VT, + DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(), 0)); + + return DAG.getNode(ISD::ADD, DL, VT, TLSBase, TLSOffset); +} + SDValue WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const { @@ -1200,6 +1228,15 @@ Op.getOperand(3) // thrown value }); } + + case Intrinsic::wasm_tls_size: { + if (Subtarget->hasAddr64()) { + fail(DL, DAG, "__builtin_wasm_tls_size is not yet supported on wasm64"); + return SDValue(); + } + return DAG.getNode(WebAssemblyISD::GLOBAL_GET, DL, MVT::i32, + DAG.getTargetExternalSymbol("__tls_size", MVT::i32)); + } } } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -80,6 +80,10 @@ def SDT_WebAssemblyWrapperPIC : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; def SDT_WebAssemblyThrow : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; +def SDT_WebAssemblyGlobalGet : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, + SDTCisVT<1, i32>]>; +def SDT_WebAssemblyConst : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, + SDTCisVT<1, i32>]>; //===----------------------------------------------------------------------===// // WebAssembly-specific DAG Nodes. @@ -113,6 +117,11 @@ SDT_WebAssemblyWrapperPIC>; def WebAssemblythrow : SDNode<"WebAssemblyISD::THROW", SDT_WebAssemblyThrow, [SDNPHasChain, SDNPVariadic]>; +def WebAssemblyglobalget : SDNode<"WebAssemblyISD::GLOBAL_GET", + SDT_WebAssemblyGlobalGet, + [SDNPMayLoad]>; +def WebAssemblyconst : SDNode<"WebAssemblyISD::CONST", + SDT_WebAssemblyConst>; //===----------------------------------------------------------------------===// // WebAssembly-specific Operands. @@ -333,6 +342,12 @@ def : Pat<(i32 (WebAssemblywrapper mcsym:$sym)), (CONST_I32 mcsym:$sym)>; def : Pat<(i64 (WebAssemblywrapper mcsym:$sym)), (CONST_I64 mcsym:$sym)>; +def : Pat<(i32 (WebAssemblyglobalget texternalsym:$idx)), + (GLOBAL_GET_I32 texternalsym:$idx)>; + +def : Pat<(i32 (WebAssemblyconst tglobaltlsaddr:$idx)), + (CONST_I32 tglobaltlsaddr:$idx)>; + //===----------------------------------------------------------------------===// // Additional sets of instructions. //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp @@ -77,9 +77,11 @@ // functions. It's OK to hardcode knowledge of specific symbols here; this // method is precisely there for fetching the signatures of known // Clang-provided symbols. - if (strcmp(Name, "__stack_pointer") == 0 || - strcmp(Name, "__memory_base") == 0 || strcmp(Name, "__table_base") == 0) { - bool Mutable = strcmp(Name, "__stack_pointer") == 0; + if (strcmp(Name, "__stack_pointer") == 0 || strcmp(Name, "__tls_base") == 0 || + strcmp(Name, "__memory_base") == 0 || strcmp(Name, "__table_base") == 0 || + strcmp(Name, "__tls_size") == 0) { + bool Mutable = + strcmp(Name, "__stack_pointer") == 0 || strcmp(Name, "__tls_base") == 0; WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL); WasmSym->setGlobalType(wasm::WasmGlobalType{ uint8_t(Subtarget.hasAddr64() ? wasm::WASM_TYPE_I64 diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -187,10 +187,11 @@ replaceFeatures(F, FeatureStr); bool Stripped = false; - if (!Features[WebAssembly::FeatureAtomics]) { + if (!Features[WebAssembly::FeatureAtomics]) Stripped |= stripAtomics(M); + + if (!Features[WebAssembly::FeatureBulkMemory]) Stripped |= stripThreadLocals(M); - } recordFeatures(M, Features, Stripped); diff --git a/llvm/test/CodeGen/WebAssembly/target-features-tls.ll b/llvm/test/CodeGen/WebAssembly/target-features-tls.ll --- a/llvm/test/CodeGen/WebAssembly/target-features-tls.ll +++ b/llvm/test/CodeGen/WebAssembly/target-features-tls.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mattr=-atomics | FileCheck %s --check-prefixes CHECK,NO-ATOMICS -; RUN: llc < %s -mattr=+atomics | FileCheck %s --check-prefixes CHECK,ATOMICS +; RUN: llc < %s -mattr=-bulk-memory | FileCheck %s --check-prefixes NO-BULK-MEM +; RUN: llc < %s -mattr=+bulk-memory | FileCheck %s --check-prefixes BULK-MEM ; Test that the target features section contains -atomics or +atomics ; for modules that have thread local storage in their source. @@ -9,18 +9,19 @@ @foo = internal thread_local global i32 0 -; CHECK-LABEL: .custom_section.target_features,"",@ -; -atomics -; NO-ATOMICS-NEXT: .int8 1 -; NO-ATOMICS-NEXT: .int8 45 -; NO-ATOMICS-NEXT: .int8 7 -; NO-ATOMICS-NEXT: .ascii "atomics" -; NO-ATOMICS-NEXT: .bss.foo,"",@ +; -bulk-memory +; NO-BULK-MEM-LABEL: .custom_section.target_features,"",@ +; NO-BULK-MEM-NEXT: .int8 1 +; NO-BULK-MEM-NEXT: .int8 45 +; NO-BULK-MEM-NEXT: .int8 7 +; NO-BULK-MEM-NEXT: .ascii "atomics" +; NO-BULK-MEM-NEXT: .bss.foo,"",@ -; +atomics -; ATOMICS-NEXT: .int8 1 -; ATOMICS-NEXT: .int8 43 -; ATOMICS-NEXT: .int8 7 -; ATOMICS-NEXT: .ascii "atomics" -; ATOMICS-NEXT: .tbss.foo,"",@ +; +bulk-memory +; BULK-MEM-LABEL: .custom_section.target_features,"",@ +; BULK-MEM-NEXT: .int8 1 +; BULK-MEM-NEXT: .int8 43 +; BULK-MEM-NEXT: .int8 11 +; BULK-MEM-NEXT: .ascii "bulk-memory" +; BULK-MEM-NEXT: .tbss.foo,"",@ diff --git a/llvm/test/CodeGen/WebAssembly/tls.ll b/llvm/test/CodeGen/WebAssembly/tls.ll --- a/llvm/test/CodeGen/WebAssembly/tls.ll +++ b/llvm/test/CodeGen/WebAssembly/tls.ll @@ -1,17 +1,61 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck --check-prefix=SINGLE %s +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory | FileCheck %s +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory -fast-isel | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" -; SINGLE-LABEL: address_of_tls: +; CHECK-LABEL: address_of_tls: define i32 @address_of_tls() { - ; SINGLE: i32.const $push0=, tls - ; SINGLE-NEXT: return $pop0 + ; CHECK-DAG: global.get __tls_base + ; CHECK-DAG: i32.const tls + ; CHECK-NEXT: i32.add + ; CHECK-NEXT: return ret i32 ptrtoint(i32* @tls to i32) } -; SINGLE: .type tls,@object -; SINGLE-NEXT: .section .bss.tls,"",@ -; SINGLE-NEXT: .p2align 2 -; SINGLE-NEXT: tls: -; SINGLE-NEXT: .int32 0 -@tls = internal thread_local global i32 0 +; CHECK-LABEL: ptr_to_tls +define i32* @ptr_to_tls() { + ; CHECK-DAG: global.get __tls_base + ; CHECK-DAG: i32.const tls + ; CHECK-NEXT: i32.add + ; CHECK-NEXT: return + ret i32* @tls +} + +; CHECK-LABEL: tls_load +define i32 @tls_load() { + ; CHECK-DAG: global.get __tls_base + ; CHECK-DAG: i32.const tls + ; CHECK-NEXT: i32.add + ; CHECK-NEXT: i32.load 0 + ; CHECK-NEXT: return + %tmp = load i32, i32* @tls, align 4 + ret i32 %tmp +} + +; CHECK-LABEL: tls_store +define void @tls_store(i32 %x) { + ; CHECK-DAG: global.get __tls_base + ; CHECK-DAG: i32.const tls + ; CHECK-NEXT: i32.add + ; CHECK-NEXT: i32.store 0 + ; CHECK-NEXT: return + store i32 %x, i32* @tls, align 4 + ret void +} + +; CHECK-LABEL: tls_size: +; CHECK-NEXT: .functype tls_size () -> (i32) +; CHECK-NEXT: global.get __tls_size +; CHECK-NEXT: return +declare i32 @llvm.wasm.tls.size.i32() +define i32 @tls_size() { + %1 = call i32 @llvm.wasm.tls.size.i32() + ret i32 %1 +} + +; CHECK: .type tls,@object +; CHECK-NEXT: .section .tbss.tls,"",@ +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: tls: +; CHECK-NEXT: .int32 0 +@tls = internal thread_local(localexec) global i32 0