diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -88,88 +88,36 @@ uint64_t SyncScopeID = cast(Node->getOperand(2).getNode())->getZExtValue(); + MachineSDNode *Fence = nullptr; switch (SyncScopeID) { - case SyncScope::SingleThread: { + case SyncScope::SingleThread: // We lower a single-thread fence to a pseudo compiler barrier instruction // preventing instruction reordering. This will not be emitted in final // binary. - MachineSDNode *Fence = - CurDAG->getMachineNode(WebAssembly::COMPILER_FENCE, - DL, // debug loc - MVT::Other, // outchain type - Node->getOperand(0) // inchain - ); - ReplaceNode(Node, Fence); - CurDAG->RemoveDeadNode(Node); - return; - } - - case SyncScope::System: { - // For non-emscripten systems, we have not decided on what we should - // traslate fences to yet. - if (!Subtarget->getTargetTriple().isOSEmscripten()) - report_fatal_error( - "ATOMIC_FENCE is not yet supported in non-emscripten OSes"); - - // Wasm does not have a fence instruction, but because all atomic - // instructions in wasm are sequentially consistent, we translate a - // fence to an idempotent atomic RMW instruction to a linear memory - // address. All atomic instructions in wasm are sequentially consistent, - // but this is to ensure a fence also prevents reordering of non-atomic - // instructions in the VM. Even though LLVM IR's fence instruction does - // not say anything about its relationship with non-atomic instructions, - // we think this is more user-friendly. - // - // While any address can work, here we use a value stored in - // __stack_pointer wasm global because there's high chance that area is - // in cache. - // - // So the selected instructions will be in the form of: - // %addr = get_global $__stack_pointer - // %0 = i32.const 0 - // i32.atomic.rmw.or %addr, %0 - SDValue StackPtrSym = CurDAG->getTargetExternalSymbol( - "__stack_pointer", TLI->getPointerTy(CurDAG->getDataLayout())); - MachineSDNode *GetGlobal = - CurDAG->getMachineNode(WebAssembly::GLOBAL_GET_I32, // opcode - DL, // debug loc - MVT::i32, // result type - StackPtrSym // __stack_pointer symbol - ); - - SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); - auto *MMO = MF.getMachineMemOperand( - MachinePointerInfo::getUnknownStack(MF), - // FIXME Volatile isn't really correct, but currently all LLVM - // atomic instructions are treated as volatiles in the backend, so - // we should be consistent. - MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad | - MachineMemOperand::MOStore, - 4, 4, AAMDNodes(), nullptr, SyncScope::System, - AtomicOrdering::SequentiallyConsistent); - MachineSDNode *Const0 = - CurDAG->getMachineNode(WebAssembly::CONST_I32, DL, MVT::i32, Zero); - MachineSDNode *AtomicRMW = CurDAG->getMachineNode( - WebAssembly::ATOMIC_RMW_OR_I32, // opcode - DL, // debug loc - MVT::i32, // result type - MVT::Other, // outchain type - { - Zero, // alignment - Zero, // offset - SDValue(GetGlobal, 0), // __stack_pointer - SDValue(Const0, 0), // OR with 0 to make it idempotent - Node->getOperand(0) // inchain - }); - - CurDAG->setNodeMemRefs(AtomicRMW, {MMO}); - ReplaceUses(SDValue(Node, 0), SDValue(AtomicRMW, 1)); - CurDAG->RemoveDeadNode(Node); - return; - } + Fence = CurDAG->getMachineNode(WebAssembly::COMPILER_FENCE, + DL, // debug loc + MVT::Other, // outchain type + Node->getOperand(0) // inchain + ); + break; + case SyncScope::System: + // Currently wasm only supports sequentially consistent atomics, so we + // always set the order to 0 (sequentially consistent). + Fence = CurDAG->getMachineNode( + WebAssembly::ATOMIC_FENCE, + DL, // debug loc + MVT::Other, // outchain type + CurDAG->getTargetConstant(0, DL, MVT::i32), // order + Node->getOperand(0) // inchain + ); + break; default: llvm_unreachable("Unknown scope!"); } + + ReplaceNode(Node, Fence); + CurDAG->RemoveDeadNode(Node); + return; } case ISD::GlobalTLSAddress: { diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td @@ -126,6 +126,19 @@ def : WaitPatGlobalAddrOffOnly; } // Predicates = [HasAtomics] +//===----------------------------------------------------------------------===// +// Atomic fences +//===----------------------------------------------------------------------===// + +// A compiler fence instruction that prevents reordering of instructions. +let Defs = [ARGUMENTS] in { +let isPseudo = 1, hasSideEffects = 1 in +defm COMPILER_FENCE : ATOMIC_NRI<(outs), (ins), [], "compiler_fence">; +let hasSideEffects = 1 in +defm ATOMIC_FENCE : ATOMIC_NRI<(outs), (ins i8imm:$flags), [], "atomic.fence", + 0x03>; +} // Defs = [ARGUMENTS] + //===----------------------------------------------------------------------===// // Atomic loads //===----------------------------------------------------------------------===// @@ -887,13 +900,3 @@ ATOMIC_RMW8_U_CMPXCHG_I32, ATOMIC_RMW16_U_CMPXCHG_I32, ATOMIC_RMW8_U_CMPXCHG_I64, ATOMIC_RMW16_U_CMPXCHG_I64, ATOMIC_RMW32_U_CMPXCHG_I64>; - -//===----------------------------------------------------------------------===// -// Atomic fences -//===----------------------------------------------------------------------===// - -// A compiler fence instruction that prevents reordering of instructions. -let Defs = [ARGUMENTS] in { -let isPseudo = 1, hasSideEffects = 1 in -defm COMPILER_FENCE : ATOMIC_NRI<(outs), (ins), [], "compiler_fence">; -} // Defs = [ARGUMENTS] diff --git a/llvm/test/CodeGen/WebAssembly/atomic-fence.ll b/llvm/test/CodeGen/WebAssembly/atomic-fence.ll --- a/llvm/test/CodeGen/WebAssembly/atomic-fence.ll +++ b/llvm/test/CodeGen/WebAssembly/atomic-fence.ll @@ -1,19 +1,12 @@ ; RUN: llc < %s | FileCheck %s --check-prefix NOATOMIC -; RUN: not llc < %s -mtriple=wasm32-unknown-unknown -mattr=+atomics,+sign-ext 2>&1 | FileCheck %s --check-prefixes NOEMSCRIPTEN -; RUN: not llc < %s -mtriple=wasm32-unknown-wasi -mattr=+atomics,+sign-ext 2>&1 | FileCheck %s --check-prefixes NOEMSCRIPTEN -; RUN: llc < %s -mtriple=wasm32-unknown-emscripten -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+atomics,+sign-ext | FileCheck %s +; RUN: llc < %s -asm-verbose=false -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+atomics | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" -; NOEMSCRIPTEN: LLVM ERROR: ATOMIC_FENCE is not yet supported in non-emscripten OSes - -; A multithread fence turns into 'global.get $__stack_pointer' followed by an -; idempotent atomicrmw instruction. +; A multithread fence is lowered to an atomic.fence instruction. ; CHECK-LABEL: multithread_fence: -; CHECK: global.get $push[[SP:[0-9]+]]=, __stack_pointer -; CHECK-NEXT: i32.const $push[[ZERO:[0-9]+]]=, 0 -; CHECK-NEXT: i32.atomic.rmw.or $drop=, 0($pop[[SP]]), $pop[[ZERO]] +; CHECK: atomic.fence ; NOATOMIC-NOT: i32.atomic.rmw.or define void @multithread_fence() { fence seq_cst @@ -23,10 +16,9 @@ ; Fences with weaker memory orderings than seq_cst should be treated the same ; because atomic memory access in wasm are sequentially consistent. ; CHECK-LABEL: multithread_weak_fence: -; CHECK: global.get $push{{.+}}=, __stack_pointer -; CHECK: i32.atomic.rmw.or -; CHECK: i32.atomic.rmw.or -; CHECK: i32.atomic.rmw.or +; CHECK: atomic.fence +; CHECK-NEXT: atomic.fence +; CHECK-NEXT: atomic.fence define void @multithread_weak_fence() { fence acquire fence release @@ -37,7 +29,8 @@ ; A singlethread fence becomes compiler_fence instruction, a pseudo instruction ; that acts as a compiler barrier. The barrier should not be emitted to .s file. ; CHECK-LABEL: singlethread_fence: -; CHECK-NOT: compiler_fence +; CHECK-NOT: compiler_fence +; CHECK-NOT: atomic_fence define void @singlethread_fence() { fence syncscope("singlethread") seq_cst fence syncscope("singlethread") acquire diff --git a/llvm/test/CodeGen/WebAssembly/atomic-fence.mir b/llvm/test/CodeGen/WebAssembly/atomic-fence.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/atomic-fence.mir @@ -0,0 +1,68 @@ +# RUN: llc -mtriple=wasm32-unknown-unknown -run-pass wasm-reg-stackify -run-pass wasm-explicit-locals %s -o - | FileCheck %s + +# In the two tests below, without compiler_fence or atomic.fence in between, +# atomic.notify and i32.add will be reordered by register stackify pass to meet +# 'call @foo''s requirements. But because we have fences between atomic.notify +# and i32.add, they cannot be reordered, and local.set and local.get are +# inserted to save and load atomic.notify's return value. + +--- | + target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" + target triple = "wasm32-unknown-unknown" + + declare void @foo(i32, i32) + define void @compiler_fence_test(i32) { + ret void + } + define void @atomic_fence_test(i32) { + ret void + } +... +--- +# CHECK-LABEL: name: compiler_fence_test +name: compiler_fence_test +liveins: + - { reg: '$arguments' } +tracksRegLiveness: true +body: | + bb.0: + ; CHECK: %[[REG:[0-9]+]]:i32 = ATOMIC_NOTIFY + ; CHECK: LOCAL_SET_I32 [[LOCAL:[0-9]+]], %[[REG]] + ; CHECK: COMPILER_FENCE + ; CHECK: ADD_I32 + ; CHECK: LOCAL_GET_I32 [[LOCAL]] + ; CHECK: CALL_VOID @foo + + liveins: $arguments + %0:i32 = CONST_I32 0, implicit-def $arguments + %1:i32 = ATOMIC_NOTIFY 2, 0, %0:i32, %0:i32, implicit-def $arguments + COMPILER_FENCE implicit-def $arguments + %2:i32 = ADD_I32 %0:i32, %0:i32, implicit-def $arguments + CALL_VOID @foo, %2:i32, %1:i32, implicit-def $arguments + RETURN_VOID implicit-def $arguments +... + +--- +# CHECK-LABEL: name: atomic_fence_test +name: atomic_fence_test +liveins: + - { reg: '$arguments' } +tracksRegLiveness: true +body: | + bb.0: + ; CHECK: %[[REG:[0-9]+]]:i32 = ATOMIC_NOTIFY + ; CHECK: LOCAL_SET_I32 [[LOCAL:[0-9]+]], %[[REG]] + ; CHECK: ATOMIC_FENCE + ; CHECK: ADD_I32 + ; CHECK: LOCAL_GET_I32 [[LOCAL]] + ; CHECK: CALL_VOID @foo + + liveins: $arguments + %0:i32 = CONST_I32 0, implicit-def $arguments + %1:i32 = ATOMIC_NOTIFY 2, 0, %0:i32, %0:i32, implicit-def $arguments + ATOMIC_FENCE 0, implicit-def $arguments + %2:i32 = ADD_I32 %0:i32, %0:i32, implicit-def $arguments + CALL_VOID @foo, %2:i32, %1:i32, implicit-def $arguments + RETURN_VOID implicit-def $arguments +... + diff --git a/llvm/test/MC/WebAssembly/atomics-encodings.s b/llvm/test/MC/WebAssembly/atomics-encodings.s --- a/llvm/test/MC/WebAssembly/atomics-encodings.s +++ b/llvm/test/MC/WebAssembly/atomics-encodings.s @@ -10,6 +10,9 @@ # CHECK: i64.atomic.wait 0 # encoding: [0xfe,0x02,0x03,0x00] i64.atomic.wait 0 + # CHECK: atomic.fence # encoding: [0xfe,0x03,0x00] + atomic.fence + # CHECK: i32.atomic.load 0 # encoding: [0xfe,0x10,0x02,0x00] i32.atomic.load 0 # CHECK: i64.atomic.load 4 # encoding: [0xfe,0x11,0x03,0x04]