Index: lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -369,6 +369,10 @@ OutStreamer->AddBlankLine(); } break; + case WebAssembly::COMPILER_FENCE: + // This is a compiler barrier that prevents instruction reordering during + // backend compilation, and should not be emitted. + break; case WebAssembly::EXTRACT_EXCEPTION_I32: case WebAssembly::EXTRACT_EXCEPTION_I32_S: // These are pseudo instructions that simulates popping values from stack. Index: lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -77,14 +77,103 @@ return; } - // Few custom selection stuff. If we need WebAssembly-specific selection, - // uncomment this block add corresponding case statements. - /* + // Few custom selection stuff. + SDLoc DL(Node); + MachineFunction &MF = CurDAG->getMachineFunction(); switch (Node->getOpcode()) { + case ISD::ATOMIC_FENCE: { + if (!MF.getSubtarget().hasAtomics()) + break; + + uint64_t SyncScopeID = + cast(Node->getOperand(2).getNode())->getZExtValue(); + switch (SyncScopeID) { + case SyncScope::SingleThread: { + // We lower a single-thread fence to a pseudo compiler barrier instruction + // preventing instruction reordering. This will not be emitted in final + // binary. + MachineSDNode *Fence = + CurDAG->getMachineNode(WebAssembly::COMPILER_FENCE, + DL, // debug loc + MVT::Other, // outchain type + Node->getOperand(0) // inchain + ); + ReplaceNode(Node, Fence); + CurDAG->RemoveDeadNode(Node); + return; + } + + case SyncScope::System: { + // For non-emscripten systems, we have not decided on what we should + // traslate fences to yet. + if (!Subtarget->getTargetTriple().isOSEmscripten()) + report_fatal_error( + "ATOMIC_FENCE is not yet supported in non-emscripten OSes"); + + // Wasm does not have a fence instruction, but because all atomic + // instructions in wasm are sequentially consistent, we translate a + // fence to an idempotent atomic RMW instruction to a linear memory + // address. All atomic instructions in wasm are sequentially consistent, + // but this is to ensure a fence also prevents reordering of non-atomic + // instructions in the VM. Even though LLVM IR's fence instruction does + // not say anything about its relationship with non-atomic instructions, + // we think this is more user-friendly. + // + // While any address can work, here we use a value stored in + // __stack_pointer wasm global because there's high chance that area is + // in cache. + // + // So the selected instructions will be in the form of: + // %addr = get_global $__stack_pointer + // %0 = i32.const 0 + // i32.atomic.rmw.or %addr, %0 + SDValue StackPtrSym = CurDAG->getTargetExternalSymbol( + "__stack_pointer", TLI->getPointerTy(CurDAG->getDataLayout())); + MachineSDNode *GetGlobal = + CurDAG->getMachineNode(WebAssembly::GLOBAL_GET_I32, // opcode + DL, // debug loc + MVT::i32, // result type + StackPtrSym // __stack_pointer symbol + ); + + SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); + auto *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getUnknownStack(MF), + // FIXME Volatile isn't really correct, but currently all LLVM + // atomic instructions are treated as volatiles in the backend, so + // we should be consistent. + MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad | + MachineMemOperand::MOStore, + 4, 4, AAMDNodes(), nullptr, SyncScope::System, + AtomicOrdering::SequentiallyConsistent); + MachineSDNode *Const0 = + CurDAG->getMachineNode(WebAssembly::CONST_I32, DL, MVT::i32, Zero); + MachineSDNode *AtomicRMW = CurDAG->getMachineNode( + WebAssembly::ATOMIC_RMW_OR_I32, // opcode + DL, // debug loc + MVT::i32, // result type + MVT::Other, // outchain type + { + Zero, // alignment + Zero, // offset + SDValue(GetGlobal, 0), // __stack_pointer + SDValue(Const0, 0), // OR with 0 to make it idempotent + Node->getOperand(0) // inchain + }); + + CurDAG->setNodeMemRefs(AtomicRMW, {MMO}); + ReplaceUses(SDValue(Node, 0), SDValue(AtomicRMW, 1)); + CurDAG->RemoveDeadNode(Node); + return; + } + default: + llvm_unreachable("Unknown scope!"); + } + } + default: break; } - */ // Select the default instruction. SelectCode(Node); Index: lib/Target/WebAssembly/WebAssemblyInstrAtomics.td =================================================================== --- lib/Target/WebAssembly/WebAssemblyInstrAtomics.td +++ lib/Target/WebAssembly/WebAssemblyInstrAtomics.td @@ -887,3 +887,13 @@ ATOMIC_RMW8_U_CMPXCHG_I32, ATOMIC_RMW16_U_CMPXCHG_I32, ATOMIC_RMW8_U_CMPXCHG_I64, ATOMIC_RMW16_U_CMPXCHG_I64, ATOMIC_RMW32_U_CMPXCHG_I64>; + +//===----------------------------------------------------------------------===// +// Atomic fences +//===----------------------------------------------------------------------===// + +// A compiler fence instruction that prevents reordering of instructions. +let Defs = [ARGUMENTS] in { +let isPseudo = 1, hasSideEffects = 1 in +defm COMPILER_FENCE : ATOMIC_NRI<(outs), (ins), [], "compiler_fence">; +} // Defs = [ARGUMENTS] Index: test/CodeGen/WebAssembly/atomic-fence.ll =================================================================== --- /dev/null +++ test/CodeGen/WebAssembly/atomic-fence.ll @@ -0,0 +1,47 @@ +; RUN: llc < %s | FileCheck %s --check-prefix NOATOMIC +; RUN: not llc < %s -mtriple=wasm32-unknown-unknown -mattr=+atomics,+sign-ext 2>&1 | FileCheck %s --check-prefixes NOSUPPORT +; RUN: not llc < %s -mtriple=wasm32-unknown-wasi -mattr=+atomics,+sign-ext 2>&1 | FileCheck %s --check-prefixes NOSUPPORT +; RUN: llc < %s -mtriple=wasm32-unknown-emscripten -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+atomics,+sign-ext | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +; NOSUPPORT: LLVM ERROR: ATOMIC_FENCE is not yet supported in non-emscripten OSes + +; A multithread fence turns into 'global.get $__stack_pointer' followed by an +; idempotent atomicrmw instruction. +; CHECK-LABEL: multithread_fence: +; CHECK: global.get $push[[SP:[0-9]+]]=, __stack_pointer +; CHECK-NEXT: i32.const $push[[ZERO:[0-9]+]]=, 0 +; CHECK-NEXT: i32.atomic.rmw.or $drop=, 0($pop[[SP]]), $pop[[ZERO]] +; NOATOMIC-NOT: i32.atomic.rmw.or +define void @multithread_fence() { + fence seq_cst + ret void +} + +; Fences with weaker memory orderings than seq_cst should be treated the same +; because atomic memory access in wasm are sequentially consistent. +; CHECK-LABEL: multithread_weak_fence: +; CHECK: global.get $push{{.+}}=, __stack_pointer +; CHECK: i32.atomic.rmw.or +; CHECK: i32.atomic.rmw.or +; CHECK: i32.atomic.rmw.or +define void @multithread_weak_fence() { + fence acquire + fence release + fence acq_rel + ret void +} + +; A singlethread fence becomes compiler_fence instruction, a pseudo instruction +; that acts as a compiler barrier. The barrier should not be emitted to .s file. +; CHECK-LABEL: singlethread_fence: +; CHECK-NOT: compiler_fence +define void @singlethread_fence() { + fence syncscope("singlethread") seq_cst + fence syncscope("singlethread") acquire + fence syncscope("singlethread") release + fence syncscope("singlethread") acq_rel + ret void +}