diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def --- a/clang/include/clang/Basic/BuiltinsWebAssembly.def +++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def @@ -29,6 +29,9 @@ TARGET_BUILTIN(__builtin_wasm_memory_init, "vIUiIUiv*UiUi", "", "bulk-memory") TARGET_BUILTIN(__builtin_wasm_data_drop, "vIUi", "", "bulk-memory") +// Thread-local storage +TARGET_BUILTIN(__builtin_wasm_tls_size, "z", "nc", "bulk-memory") + // Floating point min/max BUILTIN(__builtin_wasm_min_f32, "fff", "nc") BUILTIN(__builtin_wasm_max_f32, "fff", "nc") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -13905,6 +13905,11 @@ Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_data_drop); return Builder.CreateCall(Callee, {Arg}); } + case WebAssembly::BI__builtin_wasm_tls_size: { + llvm::Type *ResultType = ConvertType(E->getType()); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType); + return Builder.CreateCall(Callee); + } case WebAssembly::BI__builtin_wasm_throw: { Value *Tag = EmitScalarExpr(E->getArg(0)); Value *Obj = EmitScalarExpr(E->getArg(1)); diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c --- a/clang/test/CodeGen/builtins-wasm.c +++ b/clang/test/CodeGen/builtins-wasm.c @@ -38,6 +38,12 @@ // WEBASSEMBLY64: call void @llvm.wasm.data.drop(i32 3) } +__SIZE_TYPE__ tls_size() { + return __builtin_wasm_tls_size(); + // WEBASSEMBLY32: call i32 @llvm.wasm.tls.size.i32() + // WEBASSEMBLY64: call i64 @llvm.wasm.tls.size.i64() +} + void throw(void *obj) { return __builtin_wasm_throw(0, obj); // WEBASSEMBLY32: call void @llvm.wasm.throw(i32 0, i8* %{{.*}}) diff --git a/lld/test/wasm/data-segments.ll b/lld/test/wasm/data-segments.ll --- a/lld/test/wasm/data-segments.ll +++ b/lld/test/wasm/data-segments.ll @@ -4,11 +4,11 @@ ; atomics => active segments (TODO: error) ; RUN: wasm-ld -no-gc-sections --no-entry --shared-memory --max-memory=131072 %t.atomics.o -o %t.atomics.wasm -; RUN: obj2yaml %t.atomics.wasm | FileCheck %s --check-prefix ACTIVE +; RUN: obj2yaml %t.atomics.wasm | FileCheck %s --check-prefixes ACTIVE,ACTIVE-TLS ; atomics, active segments => active segments ; RUN: wasm-ld -no-gc-sections --no-entry --shared-memory --max-memory=131072 --active-segments %t.atomics.o -o %t.atomics.active.wasm -; RUN: obj2yaml %t.atomics.active.wasm | FileCheck %s --check-prefix ACTIVE +; RUN: obj2yaml %t.atomics.active.wasm | FileCheck %s --check-prefixes ACTIVE,ACTIVE-TLS ; atomics, passive segments => error ; RUN: not wasm-ld -no-gc-sections --no-entry --shared-memory --max-memory=131072 --passive-segments %t.atomics.o -o %t.atomics.passive.wasm 2>&1 | FileCheck %s --check-prefix ERROR @@ -27,15 +27,15 @@ ; atomics, bulk memory => active segments (TODO: passive) ; RUN: wasm-ld -no-gc-sections --no-entry --shared-memory --max-memory=131072 %t.atomics.bulk-mem.o -o %t.atomics.bulk-mem.wasm -; RUN: obj2yaml %t.atomics.bulk-mem.wasm | FileCheck %s --check-prefix ACTIVE +; RUN: obj2yaml %t.atomics.bulk-mem.wasm | FileCheck %s --check-prefixes ACTIVE,ACTIVE-TLS ; atomics, bulk memory, active segments => active segments ; RUN: wasm-ld -no-gc-sections --no-entry --shared-memory --max-memory=131072 --active-segments %t.atomics.bulk-mem.o -o %t.atomics.bulk-mem.active.wasm -; RUN: obj2yaml %t.atomics.bulk-mem.active.wasm | FileCheck %s --check-prefix ACTIVE +; RUN: obj2yaml %t.atomics.bulk-mem.active.wasm | FileCheck %s --check-prefixes ACTIVE,ACTIVE-TLS ; atomics, bulk memory, passive segments => passive segments ; RUN: wasm-ld -no-gc-sections --no-entry --shared-memory --max-memory=131072 --passive-segments %t.atomics.bulk-mem.o -o %t.atomics.bulk-mem.passive.wasm -; RUN: obj2yaml %t.atomics.bulk-mem.passive.wasm | FileCheck %s --check-prefix PASSIVE +; RUN: obj2yaml %t.atomics.bulk-mem.passive.wasm | FileCheck %s --check-prefixes PASSIVE,PASSIVE-TLS target triple = "wasm32-unknown-unknown" @@ -54,6 +54,9 @@ ; ACTIVE-NEXT: - Index: 0 ; ACTIVE-NEXT: Locals: [] ; ACTIVE-NEXT: Body: 0B +; ACTIVE-TLS-NEXT: - Index: 1 +; ACTIVE-TLS-NEXT: Locals: [] +; ACTIVE-TLS-NEXT: Body: 0B ; ACTIVE-NEXT: - Type: DATA ; ACTIVE-NEXT: Segments: ; ACTIVE-NEXT: - SectionOffset: 7 @@ -80,6 +83,8 @@ ; ACTIVE-NEXT: FunctionNames: ; ACTIVE-NEXT: - Index: 0 ; ACTIVE-NEXT: Name: __wasm_call_ctors +; ACTIVE-TLS-NEXT: - Index: 1 +; ACTIVE-TLS-NEXT: Name: __wasm_init_tls ; PASSIVE-LABEL: - Type: CODE ; PASSIVE-NEXT: Functions: @@ -89,6 +94,9 @@ ; PASSIVE-NEXT: - Index: 1 ; PASSIVE-NEXT: Locals: [] ; PASSIVE-NEXT: Body: 41800841004114FC080000FC090041940841004190CE00FC080100FC090141A4D6004100410DFC080200FC09020B +; PASSIVE-TLS-NEXT: - Index: 2 +; PASSIVE-TLS-NEXT: Locals: [] +; PASSIVE-TLS-NEXT: Body: 0B ; PASSIVE-NEXT: - Type: DATA ; PASSIVE-NEXT: Segments: ; PASSIVE-NEXT: - SectionOffset: 3 @@ -108,3 +116,5 @@ ; PASSIVE-NEXT: Name: __wasm_call_ctors ; PASSIVE-NEXT: - Index: 1 ; PASSIVE-NEXT: Name: __wasm_init_memory +; PASSIVE-TLS-NEXT: - Index: 2 +; PASSIVE-TLS-NEXT: Name: __wasm_init_tls diff --git a/lld/test/wasm/tls.ll b/lld/test/wasm/tls.ll new file mode 100644 --- /dev/null +++ b/lld/test/wasm/tls.ll @@ -0,0 +1,81 @@ +; RUN: llc -mattr=+atomics,+bulk-memory -filetype=obj %s -o %t.o + +target triple = "wasm32-unknown-unknown" + +@tls1 = thread_local(localexec) global i32 1, align 4 +@no_tls = global i32 0, align 4 +@tls2 = thread_local(localexec) global i32 1, align 4 + +define i32* @tls1_addr() { + ret i32* @tls1 +} + +define i32* @tls2_addr() { + ret i32* @tls2 +} + +; RUN: wasm-ld -no-gc-sections --shared-memory --max-memory=131072 --no-entry -o %t.wasm %t.o +; RUN: obj2yaml %t.wasm | FileCheck %s + +; RUN: wasm-ld -no-gc-sections --shared-memory --max-memory=131072 --no-merge-data-segments --no-entry -o %t.wasm %t.o +; RUN: obj2yaml %t.wasm | FileCheck %s + +; CHECK: - Type: GLOBAL +; CHECK-NEXT: Globals: +; CHECK-NEXT: - Index: 0 +; CHECK-NEXT: Type: I32 +; CHECK-NEXT: Mutable: true +; CHECK-NEXT: InitExpr: +; CHECK-NEXT: Opcode: I32_CONST +; CHECK-NEXT: Value: 66576 +; CHECK-NEXT: - Index: 1 +; CHECK-NEXT: Type: I32 +; CHECK-NEXT: Mutable: true +; CHECK-NEXT: InitExpr: +; CHECK-NEXT: Opcode: I32_CONST +; CHECK-NEXT: Value: 0 +; CHECK-NEXT: - Index: 2 +; CHECK-NEXT: Type: I32 +; CHECK-NEXT: Mutable: false +; CHECK-NEXT: InitExpr: +; CHECK-NEXT: Opcode: I32_CONST +; CHECK-NEXT: Value: 8 + + +; CHECK: - Type: CODE +; CHECK-NEXT: Functions: +; CHECK-NEXT: - Index: 0 +; CHECK-NEXT: Locals: [] +; CHECK-NEXT: Body: 0B +; CHECK-NEXT: - Index: 1 +; CHECK-NEXT: Locals: [] +; CHECK-NEXT: Body: 20002401200041004108FC0800000B + +; Expected body of __wasm_init_tls: +; local.get 0 +; global.set 1 +; local.get 0 +; i32.const 0 +; i32.const 8 +; memory.init 0, 0 +; end + +; CHECK-NEXT: - Index: 2 +; CHECK-NEXT: Locals: [] +; CHECK-NEXT: Body: 2381808080004180808080006A0B + +; Expected body of tls1_addr: +; global.get 1 +; i32.const 0 +; i32.add +; end + +; CHECK-NEXT: - Index: 3 +; CHECK-NEXT: Locals: [] +; CHECK-NEXT: Body: 2381808080004184808080006A0B + +; Expected body of tls1_addr: +; global.get 1 +; i32.const 4 +; i32.add +; end diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -454,6 +454,7 @@ // Create ABI-defined synthetic symbols static void createSyntheticSymbols() { static WasmSignature nullSignature = {{}, {}}; + static WasmSignature i32ArgSignature = {{}, {ValType::I32}}; static llvm::wasm::WasmGlobalType globalTypeI32 = {WASM_TYPE_I32, false}; static llvm::wasm::WasmGlobalType mutableGlobalTypeI32 = {WASM_TYPE_I32, true}; @@ -516,6 +517,30 @@ WasmSym::heapBase = symtab->addOptionalDataSymbol("__heap_base"); } + if (config->sharedMemory && !config->shared) { + llvm::wasm::WasmGlobal tlsBaseGlobal; + tlsBaseGlobal.Type = {WASM_TYPE_I32, true}; + tlsBaseGlobal.InitExpr.Value.Int32 = 0; + tlsBaseGlobal.InitExpr.Opcode = WASM_OPCODE_I32_CONST; + tlsBaseGlobal.SymbolName = "__tls_base"; + WasmSym::tlsBase = + symtab->addSyntheticGlobal("__tls_base", WASM_SYMBOL_VISIBILITY_HIDDEN, + make(tlsBaseGlobal, nullptr)); + + llvm::wasm::WasmGlobal tlsSizeGlobal; + tlsSizeGlobal.Type = {WASM_TYPE_I32, false}; + tlsSizeGlobal.InitExpr.Value.Int32 = 0; + tlsSizeGlobal.InitExpr.Opcode = WASM_OPCODE_I32_CONST; + tlsSizeGlobal.SymbolName = "__tls_size"; + WasmSym::tlsSize = + symtab->addSyntheticGlobal("__tls_size", WASM_SYMBOL_VISIBILITY_HIDDEN, + make(tlsSizeGlobal, nullptr)); + + WasmSym::initTLS = symtab->addSyntheticFunction( + "__wasm_init_tls", WASM_SYMBOL_VISIBILITY_HIDDEN, + make(i32ArgSignature, "__wasm_init_tls")); + } + WasmSym::dsoHandle = symtab->addSyntheticDataSymbol( "__dso_handle", WASM_SYMBOL_VISIBILITY_HIDDEN); } diff --git a/lld/wasm/Symbols.h b/lld/wasm/Symbols.h --- a/lld/wasm/Symbols.h +++ b/lld/wasm/Symbols.h @@ -426,6 +426,15 @@ // linear memory. static GlobalSymbol *stackPointer; + // __tls_base + // Global that holds the address of the base of the current thread's + // TLS block. + static GlobalSymbol *tlsBase; + + // __tls_size + // Symbol whose value is the size of the TLS block. + static GlobalSymbol *tlsSize; + // __data_end // Symbol marking the end of the data and bss. static DefinedData *dataEnd; @@ -448,6 +457,10 @@ // Function that applies relocations to data segment post-instantiation. static DefinedFunction *applyRelocs; + // __wasm_init_tls + // Function that allocates thread-local storage and initializes it. + static DefinedFunction *initTLS; + // __dso_handle // Symbol used in calls to __cxa_atexit to determine current DLL static DefinedData *dsoHandle; diff --git a/lld/wasm/Symbols.cpp b/lld/wasm/Symbols.cpp --- a/lld/wasm/Symbols.cpp +++ b/lld/wasm/Symbols.cpp @@ -27,11 +27,14 @@ DefinedFunction *WasmSym::callCtors; DefinedFunction *WasmSym::initMemory; DefinedFunction *WasmSym::applyRelocs; +DefinedFunction *WasmSym::initTLS; DefinedData *WasmSym::dsoHandle; DefinedData *WasmSym::dataEnd; DefinedData *WasmSym::globalBase; DefinedData *WasmSym::heapBase; GlobalSymbol *WasmSym::stackPointer; +GlobalSymbol *WasmSym::tlsBase; +GlobalSymbol *WasmSym::tlsSize; UndefinedGlobal *WasmSym::tableBase; UndefinedGlobal *WasmSym::memoryBase; @@ -200,8 +203,14 @@ uint32_t DefinedData::getVirtualAddress() const { LLVM_DEBUG(dbgs() << "getVirtualAddress: " << getName() << "\n"); - if (segment) + if (segment) { + // For thread local data, the symbol location is relative to the start of + // the .tdata section, since they are used as offsets from __tls_base. + // Hence, we do not add in segment->outputSeg->startVA. + if (segment->outputSeg->name == ".tdata") + return segment->outputSegmentOffset + offset; return segment->outputSeg->startVA + segment->outputSegmentOffset + offset; + } return offset; } diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -57,6 +57,7 @@ void createInitMemoryFunction(); void createApplyRelocationsFunction(); void createCallCtorsFunction(); + void createInitTLSFunction(); void assignIndexes(); void populateSymtab(); @@ -242,6 +243,11 @@ log(formatv("mem: {0,-15} offset={1,-8} size={2,-8} align={3}", seg->name, memoryPtr, seg->size, seg->alignment)); memoryPtr += seg->size; + + if (WasmSym::tlsSize && seg->name == ".tdata") { + auto *tlsSize = cast(WasmSym::tlsSize); + tlsSize->global->global.InitExpr.Value.Int32 = seg->size; + } } // TODO: Add .bss space here. @@ -353,6 +359,7 @@ StringMap used; StringMap required; StringMap disallowed; + bool tlsUsed = false; // Only infer used features if user did not specify features bool inferFeatures = !config->features.hasValue(); @@ -385,6 +392,14 @@ std::to_string(feature.Prefix)); } } + + for (InputSegment *segment : file->segments) { + if (!segment->live) + continue; + StringRef name = segment->getName(); + if (name.startswith(".tdata.") || name.startswith(".tbss.")) + tlsUsed = true; + } } if (inferFeatures) @@ -411,6 +426,10 @@ error("'bulk-memory' feature must be used in order to emit passive " "segments"); + if (!used.count("bulk-memory") && tlsUsed) + error("'bulk-memory' feature must be used in order to use thread-local " + "storage"); + // Validate that used features are allowed in output if (!inferFeatures) { for (auto &feature : used.keys()) { @@ -492,8 +511,8 @@ // implement in all major browsers. // See: https://github.com/WebAssembly/mutable-global if (g->getGlobalType()->Mutable) { - // Only the __stack_pointer should ever be create as mutable. - assert(g == WasmSym::stackPointer); + // Only __stack_pointer and __tls_base should ever be create as mutable. + assert(g == WasmSym::stackPointer || g == WasmSym::tlsBase); continue; } export_ = {name, WASM_EXTERNAL_GLOBAL, g->getGlobalIndex()}; @@ -602,6 +621,11 @@ // we only have a single __memory_base to use as our base address. if (config->isPic) return ".data"; + // We only support one thread-local segment, so we must merge the segments + // despite --no-merge-data-segments. + // We also need to merge .tbss into .tdata so they share the same offsets. + if (name.startswith(".tdata.") || name.startswith(".tbss.")) + return ".tdata"; if (!config->mergeDataSegments) return name; if (name.startswith(".text.")) @@ -625,7 +649,7 @@ if (s == nullptr) { LLVM_DEBUG(dbgs() << "new segment: " << name << "\n"); s = make(name, segments.size()); - if (config->passiveSegments) + if (config->passiveSegments || name == ".tdata") s->initFlags = WASM_SEGMENT_IS_PASSIVE; segments.push_back(s); } @@ -655,7 +679,7 @@ // initialize passive data segments for (const OutputSegment *s : segments) { - if (s->initFlags & WASM_SEGMENT_IS_PASSIVE) { + if (s->initFlags & WASM_SEGMENT_IS_PASSIVE && s->name != ".tdata") { // destination address writeU8(os, WASM_OPCODE_I32_CONST, "i32.const"); writeSleb128(os, s->startVA, "destination address"); @@ -737,6 +761,49 @@ createFunction(WasmSym::callCtors, bodyContent); } +void Writer::createInitTLSFunction() { + if (!WasmSym::initTLS->isLive()) + return; + + std::string bodyContent; + { + raw_string_ostream os(bodyContent); + + OutputSegment *tlsSeg = nullptr; + for (auto *seg : segments) { + if (seg->name == ".tdata") + tlsSeg = seg; + break; + } + + writeUleb128(os, 0, "num locals"); + if (tlsSeg) { + writeU8(os, WASM_OPCODE_LOCAL_GET, "local.get"); + writeUleb128(os, 0, "local index"); + + writeU8(os, WASM_OPCODE_GLOBAL_SET, "global.set"); + writeUleb128(os, WasmSym::tlsBase->getGlobalIndex(), "global index"); + + writeU8(os, WASM_OPCODE_LOCAL_GET, "local.get"); + writeUleb128(os, 0, "local index"); + + writeU8(os, WASM_OPCODE_I32_CONST, "i32.const"); + writeSleb128(os, 0, "segment offset"); + + writeU8(os, WASM_OPCODE_I32_CONST, "i32.const"); + writeSleb128(os, tlsSeg->size, "memory region size"); + + writeU8(os, WASM_OPCODE_MISC_PREFIX, "bulk-memory prefix"); + writeUleb128(os, WASM_OPCODE_MEMORY_INIT, "MEMORY.INIT"); + writeUleb128(os, tlsSeg->index, "segment index immediate"); + writeU8(os, 0, "memory index immediate"); + } + writeU8(os, WASM_OPCODE_END, "end function"); + } + + createFunction(WasmSym::initTLS, bodyContent); +} + // Populate InitFunctions vector with init functions from all input objects. // This is then used either when creating the output linking section or to // synthesize the "__wasm_call_ctors" function. @@ -829,6 +896,12 @@ createCallCtorsFunction(); } + if (config->sharedMemory && !config->shared) + createInitTLSFunction(); + + if (errorCount()) + return; + log("-- calculateTypes"); calculateTypes(); log("-- calculateExports"); diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h --- a/llvm/include/llvm/BinaryFormat/Wasm.h +++ b/llvm/include/llvm/BinaryFormat/Wasm.h @@ -242,7 +242,9 @@ enum : unsigned { WASM_OPCODE_END = 0x0b, WASM_OPCODE_CALL = 0x10, + WASM_OPCODE_LOCAL_GET = 0x20, WASM_OPCODE_GLOBAL_GET = 0x23, + WASM_OPCODE_GLOBAL_SET = 0x24, WASM_OPCODE_I32_STORE = 0x36, WASM_OPCODE_I32_CONST = 0x41, WASM_OPCODE_I64_CONST = 0x42, diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td --- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -124,4 +124,13 @@ [llvm_i32_ty], [IntrNoDuplicate, IntrHasSideEffects, ImmArg<0>]>; +//===----------------------------------------------------------------------===// +// Thread-local storage intrinsics +//===----------------------------------------------------------------------===// + +def int_wasm_tls_size : + Intrinsic<[llvm_anyint_ty], + [], + [IntrNoMem, IntrSpeculatable]>; + } // TargetPrefix = "wasm" diff --git a/llvm/include/llvm/MC/MCSectionWasm.h b/llvm/include/llvm/MC/MCSectionWasm.h --- a/llvm/include/llvm/MC/MCSectionWasm.h +++ b/llvm/include/llvm/MC/MCSectionWasm.h @@ -66,7 +66,8 @@ bool isVirtualSection() const override; bool isWasmData() const { - return Kind.isGlobalWriteableData() || Kind.isReadOnly(); + return Kind.isGlobalWriteableData() || Kind.isReadOnly() || + Kind.isThreadLocal(); } bool isUnique() const { return UniqueID != ~0U; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -233,6 +233,8 @@ return false; if (Addr.getGlobalValue()) return false; + if (GV->isThreadLocal()) + return false; Addr.setGlobalValue(GV); return true; } @@ -614,6 +616,8 @@ if (const GlobalValue *GV = dyn_cast(C)) { if (TLI.isPositionIndependent()) return 0; + if (GV->isThreadLocal()) + return 0; unsigned ResultReg = createResultReg(Subtarget->hasAddr64() ? &WebAssembly::I64RegClass : &WebAssembly::I32RegClass); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -15,6 +15,7 @@ #include "WebAssembly.h" #include "WebAssemblyTargetMachine.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" // To access function attributes. #include "llvm/Support/Debug.h" #include "llvm/Support/KnownBits.h" @@ -171,6 +172,54 @@ } } + case ISD::GlobalTLSAddress: { + const auto *GA = cast(Node); + + if (!MF.getSubtarget().hasBulkMemory()) + report_fatal_error("cannot use thread-local storage without bulk memory", + false); + + if (GA->getGlobal()->getThreadLocalMode() != + GlobalValue::LocalExecTLSModel) { + report_fatal_error("only -ftls-model=local-exec is supported for now", + false); + } + + MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout()); + assert(PtrVT == MVT::i32 && "only wasm32 is supported for now"); + + SDValue TLSBaseSym = CurDAG->getTargetExternalSymbol("__tls_base", PtrVT); + SDValue TLSOffsetSym = CurDAG->getTargetGlobalAddress( + GA->getGlobal(), DL, PtrVT, GA->getOffset(), 0); + + MachineSDNode *TLSBase = CurDAG->getMachineNode(WebAssembly::GLOBAL_GET_I32, + DL, MVT::i32, TLSBaseSym); + MachineSDNode *TLSOffset = CurDAG->getMachineNode( + WebAssembly::CONST_I32, DL, MVT::i32, TLSOffsetSym); + MachineSDNode *TLSAddress = + CurDAG->getMachineNode(WebAssembly::ADD_I32, DL, MVT::i32, + SDValue(TLSBase, 0), SDValue(TLSOffset, 0)); + ReplaceNode(Node, TLSAddress); + return; + } + + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IntNo = cast(Node->getOperand(0))->getZExtValue(); + switch (IntNo) { + case Intrinsic::wasm_tls_size: { + MVT PtrVT = TLI->getPointerTy(CurDAG->getDataLayout()); + assert(PtrVT == MVT::i32 && "only wasm32 is supported for now"); + + MachineSDNode *TLSSize = CurDAG->getMachineNode( + WebAssembly::GLOBAL_GET_I32, DL, PtrVT, + CurDAG->getTargetExternalSymbol("__tls_size", MVT::i32)); + ReplaceNode(Node, TLSSize); + return; + } + } + break; + } + default: break; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp @@ -77,9 +77,11 @@ // functions. It's OK to hardcode knowledge of specific symbols here; this // method is precisely there for fetching the signatures of known // Clang-provided symbols. - if (strcmp(Name, "__stack_pointer") == 0 || - strcmp(Name, "__memory_base") == 0 || strcmp(Name, "__table_base") == 0) { - bool Mutable = strcmp(Name, "__stack_pointer") == 0; + if (strcmp(Name, "__stack_pointer") == 0 || strcmp(Name, "__tls_base") == 0 || + strcmp(Name, "__memory_base") == 0 || strcmp(Name, "__table_base") == 0 || + strcmp(Name, "__tls_size") == 0) { + bool Mutable = + strcmp(Name, "__stack_pointer") == 0 || strcmp(Name, "__tls_base") == 0; WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL); WasmSym->setGlobalType(wasm::WasmGlobalType{ uint8_t(Subtarget.hasAddr64() ? wasm::WASM_TYPE_I64 diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -187,7 +187,8 @@ replaceFeatures(F, FeatureStr); bool Stripped = false; - if (!Features[WebAssembly::FeatureAtomics]) { + if (!Features[WebAssembly::FeatureAtomics] || + !Features[WebAssembly::FeatureBulkMemory]) { Stripped |= stripAtomics(M); Stripped |= stripThreadLocals(M); } @@ -271,7 +272,8 @@ // "atomics" is special: code compiled without atomics may have had its // atomics lowered to nonatomic operations. In that case, atomics is // disallowed to prevent unsafe linking with atomics-enabled objects. - assert(!Features[WebAssembly::FeatureAtomics]); + assert(!Features[WebAssembly::FeatureAtomics] || + !Features[WebAssembly::FeatureBulkMemory]); M.addModuleFlag(Module::ModFlagBehavior::Error, MDKey, wasm::WASM_FEATURE_PREFIX_DISALLOWED); } else if (Features[KV.Value]) { diff --git a/llvm/test/CodeGen/WebAssembly/atomic-fence.ll b/llvm/test/CodeGen/WebAssembly/atomic-fence.ll --- a/llvm/test/CodeGen/WebAssembly/atomic-fence.ll +++ b/llvm/test/CodeGen/WebAssembly/atomic-fence.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s | FileCheck %s --check-prefix NOATOMIC -; RUN: not llc < %s -mtriple=wasm32-unknown-unknown -mattr=+atomics,+sign-ext 2>&1 | FileCheck %s --check-prefixes NOEMSCRIPTEN -; RUN: not llc < %s -mtriple=wasm32-unknown-wasi -mattr=+atomics,+sign-ext 2>&1 | FileCheck %s --check-prefixes NOEMSCRIPTEN -; RUN: llc < %s -mtriple=wasm32-unknown-emscripten -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+atomics,+sign-ext | FileCheck %s +; RUN: not llc < %s -mtriple=wasm32-unknown-unknown -mattr=+atomics,+sign-ext,+bulk-memory 2>&1 | FileCheck %s --check-prefixes NOEMSCRIPTEN +; RUN: not llc < %s -mtriple=wasm32-unknown-wasi -mattr=+atomics,+sign-ext,+bulk-memory 2>&1 | FileCheck %s --check-prefixes NOEMSCRIPTEN +; RUN: llc < %s -mtriple=wasm32-unknown-emscripten -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+atomics,+sign-ext,+bulk-memory | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" diff --git a/llvm/test/CodeGen/WebAssembly/atomic-mem-consistency.ll b/llvm/test/CodeGen/WebAssembly/atomic-mem-consistency.ll --- a/llvm/test/CodeGen/WebAssembly/atomic-mem-consistency.ll +++ b/llvm/test/CodeGen/WebAssembly/atomic-mem-consistency.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+atomics,+sign-ext | FileCheck %s +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+atomics,+sign-ext,+bulk-memory | FileCheck %s ; Currently all wasm atomic memory access instructions are sequentially ; consistent, so even if LLVM IR specifies weaker orderings than that, we diff --git a/llvm/test/CodeGen/WebAssembly/atomic-rmw.ll b/llvm/test/CodeGen/WebAssembly/atomic-rmw.ll --- a/llvm/test/CodeGen/WebAssembly/atomic-rmw.ll +++ b/llvm/test/CodeGen/WebAssembly/atomic-rmw.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-keep-registers -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+atomics,+sign-ext | FileCheck %s +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+atomics,+sign-ext,+bulk-memory | FileCheck %s ; Test atomic RMW (read-modify-write) instructions are assembled properly. diff --git a/llvm/test/CodeGen/WebAssembly/i32-load-store-alignment.ll b/llvm/test/CodeGen/WebAssembly/i32-load-store-alignment.ll --- a/llvm/test/CodeGen/WebAssembly/i32-load-store-alignment.ll +++ b/llvm/test/CodeGen/WebAssembly/i32-load-store-alignment.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mattr=+atomics -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s +; RUN: llc < %s -mattr=+atomics,+bulk-memory -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s ; Test loads and stores with custom alignment values. diff --git a/llvm/test/CodeGen/WebAssembly/i64-load-store-alignment.ll b/llvm/test/CodeGen/WebAssembly/i64-load-store-alignment.ll --- a/llvm/test/CodeGen/WebAssembly/i64-load-store-alignment.ll +++ b/llvm/test/CodeGen/WebAssembly/i64-load-store-alignment.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mattr=+atomics -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s +; RUN: llc < %s -mattr=+atomics,+bulk-memory -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s ; Test loads and stores with custom alignment values. diff --git a/llvm/test/CodeGen/WebAssembly/load-ext-atomic.ll b/llvm/test/CodeGen/WebAssembly/load-ext-atomic.ll --- a/llvm/test/CodeGen/WebAssembly/load-ext-atomic.ll +++ b/llvm/test/CodeGen/WebAssembly/load-ext-atomic.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mattr=+atomics,+sign-ext -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s +; RUN: llc < %s -mattr=+atomics,+sign-ext,+bulk-memory -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s ; Test that extending loads are assembled properly. diff --git a/llvm/test/CodeGen/WebAssembly/offset-atomics.ll b/llvm/test/CodeGen/WebAssembly/offset-atomics.ll --- a/llvm/test/CodeGen/WebAssembly/offset-atomics.ll +++ b/llvm/test/CodeGen/WebAssembly/offset-atomics.ll @@ -1,5 +1,5 @@ ; RUN: not llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+atomics,+sign-ext | FileCheck %s +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+atomics,+sign-ext,+bulk-memory | FileCheck %s ; Test that atomic loads are assembled properly. diff --git a/llvm/test/CodeGen/WebAssembly/store-trunc-atomic.ll b/llvm/test/CodeGen/WebAssembly/store-trunc-atomic.ll --- a/llvm/test/CodeGen/WebAssembly/store-trunc-atomic.ll +++ b/llvm/test/CodeGen/WebAssembly/store-trunc-atomic.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mattr=+atomics,+sign-ext -asm-verbose=false -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s +; RUN: llc < %s -mattr=+atomics,+sign-ext,+bulk-memory -asm-verbose=false -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s ; Test that truncating stores are assembled properly. diff --git a/llvm/test/CodeGen/WebAssembly/target-features-tls.ll b/llvm/test/CodeGen/WebAssembly/target-features-tls.ll --- a/llvm/test/CodeGen/WebAssembly/target-features-tls.ll +++ b/llvm/test/CodeGen/WebAssembly/target-features-tls.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mattr=-atomics | FileCheck %s --check-prefixes CHECK,NO-ATOMICS -; RUN: llc < %s -mattr=+atomics | FileCheck %s --check-prefixes CHECK,ATOMICS +; RUN: llc < %s -mattr=-atomics,-bulk-memory | FileCheck %s --check-prefixes NO-BULK-MEM +; RUN: llc < %s -mattr=+atomics,+bulk-memory | FileCheck %s --check-prefixes BULK-MEM ; Test that the target features section contains -atomics or +atomics ; for modules that have thread local storage in their source. @@ -9,18 +9,22 @@ @foo = internal thread_local global i32 0 -; CHECK-LABEL: .custom_section.target_features,"",@ -; -atomics -; NO-ATOMICS-NEXT: .int8 1 -; NO-ATOMICS-NEXT: .int8 45 -; NO-ATOMICS-NEXT: .int8 7 -; NO-ATOMICS-NEXT: .ascii "atomics" -; NO-ATOMICS-NEXT: .bss.foo,"",@ +; -atomics,-bulk-memory +; NO-BULK-MEM-LABEL: .custom_section.target_features,"",@ +; NO-BULK-MEM-NEXT: .int8 1 +; NO-BULK-MEM-NEXT: .int8 45 +; NO-BULK-MEM-NEXT: .int8 7 +; NO-BULK-MEM-NEXT: .ascii "atomics" +; NO-BULK-MEM-NEXT: .bss.foo,"",@ -; +atomics -; ATOMICS-NEXT: .int8 1 -; ATOMICS-NEXT: .int8 43 -; ATOMICS-NEXT: .int8 7 -; ATOMICS-NEXT: .ascii "atomics" -; ATOMICS-NEXT: .tbss.foo,"",@ +; +atomics,+bulk-memory +; BULK-MEM-LABEL: .custom_section.target_features,"",@ +; BULK-MEM-NEXT: .int8 2 +; BULK-MEM-NEXT: .int8 43 +; BULK-MEM-NEXT: .int8 7 +; BULK-MEM-NEXT: .ascii "atomics" +; BULK-MEM-NEXT: .int8 43 +; BULK-MEM-NEXT: .int8 11 +; BULK-MEM-NEXT: .ascii "bulk-memory" +; BULK-MEM-NEXT: .tbss.foo,"",@ diff --git a/llvm/test/CodeGen/WebAssembly/target-features.ll b/llvm/test/CodeGen/WebAssembly/target-features.ll --- a/llvm/test/CodeGen/WebAssembly/target-features.ll +++ b/llvm/test/CodeGen/WebAssembly/target-features.ll @@ -23,7 +23,7 @@ ret void } -attributes #0 = { "target-features"="+atomics" } +attributes #0 = { "target-features"="+atomics,+bulk-memory" } attributes #1 = { "target-features"="+nontrapping-fptoint" } @@ -56,12 +56,15 @@ ; CHECK-LABEL: .custom_section.target_features,"",@ -; +atomics, +nontrapping-fptoint -; ATTRS-NEXT: .int8 2 +; +atomics, +bulk-memory, +nontrapping-fptoint +; ATTRS-NEXT: .int8 3 ; ATTRS-NEXT: .int8 43 ; ATTRS-NEXT: .int8 7 ; ATTRS-NEXT: .ascii "atomics" ; ATTRS-NEXT: .int8 43 +; ATTRS-NEXT: .int8 11 +; ATTRS-NEXT: .ascii "bulk-memory" +; ATTRS-NEXT: .int8 43 ; ATTRS-NEXT: .int8 19 ; ATTRS-NEXT: .ascii "nontrapping-fptoint" @@ -75,11 +78,14 @@ ; SIMD128-NEXT: .ascii "simd128" ; +atomics, +nontrapping-fptoint, +sign-ext, +simd128 -; BLEEDING-EDGE-NEXT: .int8 5 +; BLEEDING-EDGE-NEXT: .int8 6 ; BLEEDING-EDGE-NEXT: .int8 43 ; BLEEDING-EDGE-NEXT: .int8 7 ; BLEEDING-EDGE-NEXT: .ascii "atomics" ; BLEEDING-EDGE-NEXT: .int8 43 +; BLEEDING-EDGE-NEXT: .int8 11 +; BLEEDING-EDGE-NEXT: .ascii "bulk-memory" +; BLEEDING-EDGE-NEXT: .int8 43 ; BLEEDING-EDGE-NEXT: .int8 15 ; BLEEDING-EDGE-NEXT: .ascii "mutable-globals" ; BLEEDING-EDGE-NEXT: .int8 43 diff --git a/llvm/test/CodeGen/WebAssembly/tls.ll b/llvm/test/CodeGen/WebAssembly/tls.ll --- a/llvm/test/CodeGen/WebAssembly/tls.ll +++ b/llvm/test/CodeGen/WebAssembly/tls.ll @@ -1,17 +1,82 @@ -; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck --check-prefix=SINGLE %s +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+atomics,+bulk-memory | FileCheck %s --check-prefixes=CHECK,TLS +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+atomics,+bulk-memory -fast-isel | FileCheck %s --check-prefixes=CHECK,TLS +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+atomics,-bulk-memory | FileCheck %s --check-prefixes=CHECK,NO-TLS target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" -; SINGLE-LABEL: address_of_tls: +; CHECK-LABEL: address_of_tls: +; CHECK-NEXT: .functype address_of_tls () -> (i32) define i32 @address_of_tls() { - ; SINGLE: i32.const $push0=, tls - ; SINGLE-NEXT: return $pop0 + ; TLS-DAG: global.get __tls_base + ; TLS-DAG: i32.const tls + ; TLS-NEXT: i32.add + ; TLS-NEXT: return + + ; NO-TLS-NEXT: i32.const tls + ; NO-TLS-NEXT: return ret i32 ptrtoint(i32* @tls to i32) } -; SINGLE: .type tls,@object -; SINGLE-NEXT: .section .bss.tls,"",@ -; SINGLE-NEXT: .p2align 2 -; SINGLE-NEXT: tls: -; SINGLE-NEXT: .int32 0 -@tls = internal thread_local global i32 0 +; CHECK-LABEL: ptr_to_tls: +; CHECK-NEXT: .functype ptr_to_tls () -> (i32) +define i32* @ptr_to_tls() { + ; TLS-DAG: global.get __tls_base + ; TLS-DAG: i32.const tls + ; TLS-NEXT: i32.add + ; TLS-NEXT: return + + ; NO-TLS-NEXT: i32.const tls + ; NO-TLS-NEXT: return + ret i32* @tls +} + +; CHECK-LABEL: tls_load: +; CHECK-NEXT: .functype tls_load () -> (i32) +define i32 @tls_load() { + ; TLS-DAG: global.get __tls_base + ; TLS-DAG: i32.const tls + ; TLS-NEXT: i32.add + ; TLS-NEXT: i32.load 0 + ; TLS-NEXT: return + + ; NO-TLS-NEXT: i32.const 0 + ; NO-TLS-NEXT: i32.load tls + ; NO-TLS-NEXT: return + %tmp = load i32, i32* @tls, align 4 + ret i32 %tmp +} + +; CHECK-LABEL: tls_store: +; CHECK-NEXT: .functype tls_store (i32) -> () +define void @tls_store(i32 %x) { + ; TLS-DAG: global.get __tls_base + ; TLS-DAG: i32.const tls + ; TLS-NEXT: i32.add + ; TLS-NEXT: i32.store 0 + ; TLS-NEXT: return + + ; NO-TLS-NEXT: i32.const 0 + ; NO-TLS-NEXT: i32.store tls + ; NO-TLS-NEXT: return + store i32 %x, i32* @tls, align 4 + ret void +} + +; CHECK-LABEL: tls_size: +; CHECK-NEXT: .functype tls_size () -> (i32) +define i32 @tls_size() { +; CHECK-NEXT: global.get __tls_size +; CHECK-NEXT: return + %1 = call i32 @llvm.wasm.tls.size.i32() + ret i32 %1 +} + +; CHECK: .type tls,@object +; TLS-NEXT: .section .tbss.tls,"",@ +; NO-TLS-NEXT: .section .bss.tls,"",@ +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: tls: +; CHECK-NEXT: .int32 0 +@tls = internal thread_local(localexec) global i32 0 + +declare i32 @llvm.wasm.tls.size.i32()