Index: lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -211,6 +211,10 @@ OutStreamer->AddBlankLine(); } break; + case WebAssembly::COMPILER_FENCE: + // This is a compiler barrier that prevents instruction reordering during + // backend compilation, and should not be emitted. + break; default: { WebAssemblyMCInstLower MCInstLowering(OutContext, *this); MCInst TmpInst; Index: lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp =================================================================== --- lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -75,14 +75,94 @@ return; } - // Few custom selection stuff. If we need WebAssembly-specific selection, - // uncomment this block add corresponding case statements. - /* + // Few custom selection stuff. + SDLoc DL(Node); + MachineFunction &MF = CurDAG->getMachineFunction(); switch (Node->getOpcode()) { + case ISD::ATOMIC_FENCE: { + if (!MF.getSubtarget().hasAtomics()) + break; + + uint64_t SyncScopeID = + cast(Node->getOperand(2).getNode())->getZExtValue(); + switch (SyncScopeID) { + case SyncScope::SingleThread: { + // We lower a single-thread fence to a pseudo compiler barrier instruction + // preventing instruction reordering. This will not be emitted in final + // binary. + MachineSDNode *Fence = + CurDAG->getMachineNode(WebAssembly::COMPILER_FENCE, + DL, // debug loc + MVT::Other, // outchain type + Node->getOperand(0) // inchain + ); + ReplaceNode(Node, Fence); + CurDAG->RemoveDeadNode(Node); + return; + } + + case SyncScope::System: { + // Wasm does not have a fence instruction, but because all atomic + // instructions in wasm are sequentially consistent, we translate a fence + // to an idempotent atomic RMW instruction to a linear memory address. + // All atomic instructions in wasm are sequentially consistent, but this + // is to ensure a fence also prevents reordering of non-atomic + // instructions in the VM. Even though LLVM IR's fence instruction does + // not say anything about its relationship with non-atomic instructions, + // we think this is more user-friendly. + // + // While any address can work, here we use a value stored in + // __stack_pointer wasm global because there's high chance that area is in + // cache. + // + // So the selected instructions will be in the form of: + // %addr = get_global $__stack_pointer + // i32.atomic.rmw.or %addr, 0 + SDValue StackPtrSym = CurDAG->getExternalSymbol( + "__stack_pointer", TLI->getPointerTy(CurDAG->getDataLayout())); + MachineSDNode *GetGlobal = + CurDAG->getMachineNode(WebAssembly::GET_GLOBAL_I32, // opcode + DL, // debug loc + MVT::i32, // result type + StackPtrSym // __stack_pointer symbol + ); + + SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); + auto *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getUnknownStack(MF), + // FIXME Volatile isn't really correct, but currently all LLVM atomic + // instructions are treated as volatiles in the backend, so we should + // be consistent. + MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad | + MachineMemOperand::MOStore, + 4, 4, AAMDNodes(), nullptr, SyncScope::System, + AtomicOrdering::SequentiallyConsistent); + MachineSDNode *AtomicRMW = + CurDAG->getMachineNode(WebAssembly::ATOMIC_RMW_OR_I32, // opcode + DL, // debug loc + MVT::i32, // result type + MVT::Other, // outchain type + { + Zero, // alignment + Zero, // offset + SDValue(GetGlobal, 0), // __stack_pointer + Zero, // OR with 0 to make it idempotent + Node->getOperand(0) // inchain + }); + + CurDAG->setNodeMemRefs(AtomicRMW, {MMO}); + ReplaceUses(SDValue(Node, 0), SDValue(AtomicRMW, 1)); + CurDAG->RemoveDeadNode(Node); + return; + } + default: + llvm_unreachable("Unknown scope!"); + } + } + default: break; } - */ // Select the default instruction. SelectCode(Node); Index: lib/Target/WebAssembly/WebAssemblyInstrAtomics.td =================================================================== --- lib/Target/WebAssembly/WebAssemblyInstrAtomics.td +++ lib/Target/WebAssembly/WebAssemblyInstrAtomics.td @@ -1025,3 +1025,13 @@ def : WaitPatExternSymOffOnly; def : WaitPatExternSymOffOnly; } // Predicates = [HasAtomics] + +//===----------------------------------------------------------------------===// +// Atomic fences +//===----------------------------------------------------------------------===// + +// A compiler fence instruction that prevents reordering of instructions. +let Defs = [ARGUMENTS] in { +let isPseudo = 1, hasSideEffects = 1 in +defm COMPILER_FENCE : NRI<(outs), (ins), [], "compiler_fence">; +} // Defs = [ARGUMENTS] Index: test/CodeGen/WebAssembly/atomic-fence.ll =================================================================== --- /dev/null +++ test/CodeGen/WebAssembly/atomic-fence.ll @@ -0,0 +1,41 @@ +; RUN: not llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt +; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -disable-wasm-explicit-locals -mattr=+atomics,+sign-ext | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown" + +; A multithread fence turns into 'get_global $__stack_pointer' followed by an +; idempotent atomicrmw instruction. +; CHECK-LABEL: multithread_fence: +; CHECK: get_global $push0=, __stack_pointer +; CHECK-NEXT: i32.atomic.rmw.or $drop=, 0($pop0), 0 +define void @multithread_fence() { + fence seq_cst + ret void +} + +; Fences with weaker memory orderings than seq_cst should be treated the same +; because atomic memory access in wasm are sequentially consistent. +; CHECK-LABEL: multithread_weak_fence: +; CHECK: get_global $push{{.+}}=, __stack_pointer +; CHECK: i32.atomic.rmw.or +; CHECK: i32.atomic.rmw.or +; CHECK: i32.atomic.rmw.or +define void @multithread_weak_fence() { + fence acquire + fence release + fence acq_rel + ret void +} + +; A singlethread fence becomes compiler_fence instruction, a pseudo instruction +; that acts as a compiler barrier. The barrier should not be emitted to .s file. +; CHECK-LABEL: singlethread_fence: +; CHECK-NOT: compiler_fence +define void @singlethread_fence() { + fence syncscope("singlethread") seq_cst + fence syncscope("singlethread") acquire + fence syncscope("singlethread") release + fence syncscope("singlethread") acq_rel + ret void +}