Index: include/llvm/CodeGen/GlobalISel/IRTranslator.h =================================================================== --- include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -448,6 +448,7 @@ bool translateAtomicCmpXchg(const User &U, MachineIRBuilder &MIRBuilder); bool translateAtomicRMW(const User &U, MachineIRBuilder &MIRBuilder); + bool translateFence(const User &U, MachineIRBuilder &MIRBuilder); // Stubs to keep the compiler happy while we implement the rest of the // translation. @@ -463,9 +464,6 @@ bool translateCatchSwitch(const User &U, MachineIRBuilder &MIRBuilder) { return false; } - bool translateFence(const User &U, MachineIRBuilder &MIRBuilder) { - return false; - } bool translateAddrSpaceCast(const User &U, MachineIRBuilder &MIRBuilder) { return translateCast(TargetOpcode::G_ADDRSPACE_CAST, U, MIRBuilder); } Index: include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h =================================================================== --- include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -1136,6 +1136,9 @@ MachineInstrBuilder buildAtomicRMWUmin(Register OldValRes, Register Addr, Register Val, MachineMemOperand &MMO); + /// Build and insert `G_FENCE Ordering, Scope`. + MachineInstrBuilder buildFence(unsigned Ordering, unsigned Scope); + /// Build and insert \p Res = G_BLOCK_ADDR \p BA /// /// G_BLOCK_ADDR computes the address of a basic block. Index: include/llvm/Support/TargetOpcodes.def =================================================================== --- include/llvm/Support/TargetOpcodes.def +++ include/llvm/Support/TargetOpcodes.def @@ -316,6 +316,9 @@ HANDLE_TARGET_OPCODE(G_ATOMICRMW_UMAX) HANDLE_TARGET_OPCODE(G_ATOMICRMW_UMIN) +// Generic atomic fence +HANDLE_TARGET_OPCODE(G_FENCE) + /// Generic conditional branch instruction. HANDLE_TARGET_OPCODE(G_BRCOND) Index: include/llvm/Target/GenericOpcodes.td =================================================================== --- include/llvm/Target/GenericOpcodes.td +++ include/llvm/Target/GenericOpcodes.td @@ -743,6 +743,12 @@ def G_ATOMICRMW_UMAX : G_ATOMICRMW_OP; def G_ATOMICRMW_UMIN : G_ATOMICRMW_OP; +def G_FENCE : GenericInstruction { + let OutOperandList = (outs); + let InOperandList = (ins i32imm:$ordering, i32imm:$scope); + let hasSideEffects = 1; +} + //------------------------------------------------------------------------------ // Variadic ops //------------------------------------------------------------------------------ Index: include/llvm/Target/GlobalISel/SelectionDAGCompat.td =================================================================== --- include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -136,6 +136,7 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; // Specifies the GlobalISel equivalents for SelectionDAG's ComplexPattern. // Should be used on defs that subclass GIComplexOperandMatcher<>. Index: lib/CodeGen/GlobalISel/IRTranslator.cpp =================================================================== --- lib/CodeGen/GlobalISel/IRTranslator.cpp +++ lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2029,6 +2029,14 @@ return true; } +bool IRTranslator::translateFence(const User &U, + MachineIRBuilder &MIRBuilder) { + const FenceInst &Fence = cast(U); + MIRBuilder.buildFence(static_cast(Fence.getOrdering()), + Fence.getSyncScopeID()); + return true; +} + void IRTranslator::finishPendingPhis() { #ifndef NDEBUG DILocationVerifier Verifier; Index: lib/CodeGen/GlobalISel/MachineIRBuilder.cpp =================================================================== --- lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -850,6 +850,13 @@ MMO); } +MachineInstrBuilder +MachineIRBuilder::buildFence(unsigned Ordering, unsigned Scope) { + buildInstr(TargetOpcode::G_FENCE) + .addImm(Ordering) + .addImm(Scope); +} + MachineInstrBuilder MachineIRBuilder::buildBlockAddress(Register Res, const BlockAddress *BA) { #ifndef NDEBUG Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1180,6 +1180,11 @@ return selectG_BRCOND(I); case TargetOpcode::G_FRAME_INDEX: return selectG_FRAME_INDEX(I); + case TargetOpcode::G_FENCE: + // FIXME: Tablegen importer doesn't handle the imm operands correctly, and + // is checking for G_CONSTANT + I.setDesc(TII.get(AMDGPU::ATOMIC_FENCE)); + return true; } return false; } Index: test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir =================================================================== --- test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -144,6 +144,9 @@ # DEBUG-NEXT: G_ATOMICRMW_UMIN (opcode {{[0-9]+}}): 2 type indices # DEBUG: .. type index coverage check SKIPPED: user-defined predicate detected # +# DEBUG-NEXT: G_FENCE (opcode {{[0-9]+}}): 0 type indices +# DEBUG: .. type index coverage check SKIPPED: no rules defined +# # DEBUG-NEXT: G_BRCOND (opcode {{[0-9]+}}): 1 type index # DEBUG: .. the first uncovered type index: 1, OK # Index: test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll @@ -0,0 +1,719 @@ +; RUN: llc -global-isel -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX6,GFX68 %s +; RUN: llc -global-isel -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX8,GFX68 %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX8,GFX68 %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX10,GFX10WGP %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+code-object-v3,+cumode -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX10,GFX10CU %s + +; FUNC-LABEL: {{^}}system_one_as_acquire: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GFX6: s_waitcnt vmcnt(0){{$}} +; GFX6-NEXT: buffer_wbinvl1{{$}} +; GFX8: s_waitcnt vmcnt(0){{$}} +; GFX8-NEXT: buffer_wbinvl1_vol{{$}} +; GFX10: s_waitcnt vmcnt(0){{$}} +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}} +; GFX10-NEXT: buffer_gl0_inv{{$}} +; GFX10-NEXT: buffer_gl1_inv{{$}} +; GCN: s_endpgm +; GFX10: .amdhsa_kernel system_one_as_acquire +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @system_one_as_acquire() { +entry: + fence syncscope("one-as") acquire + ret void +} + +; FUNC-LABEL: {{^}}system_one_as_release: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_waitcnt vmcnt(0){{$}} +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}} +; GCN: s_endpgm +; GFX10: .amdhsa_kernel system_one_as_release +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @system_one_as_release() { +entry: + fence syncscope("one-as") release + ret void +} + +; FUNC-LABEL: {{^}}system_one_as_acq_rel: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_waitcnt vmcnt(0){{$}} +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}} +; GFX6: buffer_wbinvl1{{$}} +; GFX8: buffer_wbinvl1_vol{{$}} +; GFX10-NEXT: buffer_gl0_inv{{$}} +; GFX10-NEXT: buffer_gl1_inv{{$}} +; GCN: s_endpgm +; GFX10: .amdhsa_kernel system_one_as_acq_rel +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @system_one_as_acq_rel() { +entry: + fence syncscope("one-as") acq_rel + ret void +} + +; FUNC-LABEL: {{^}}system_one_as_seq_cst: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_waitcnt vmcnt(0){{$}} +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}} +; GFX6: buffer_wbinvl1{{$}} +; GFX8: buffer_wbinvl1_vol{{$}} +; GFX10-NEXT: buffer_gl0_inv{{$}} +; GFX10-NEXT: buffer_gl1_inv{{$}} +; GCN: s_endpgm +; GFX10: .amdhsa_kernel system_one_as_seq_cst +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @system_one_as_seq_cst() { +entry: + fence syncscope("one-as") seq_cst + ret void +} + +; FUNC-LABEL: {{^}}singlethread_one_as_acquire: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +; GFX10: .amdhsa_kernel singlethread_one_as_acquire +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @singlethread_one_as_acquire() { +entry: + fence syncscope("singlethread-one-as") acquire + ret void +} + +; FUNC-LABEL: {{^}}singlethread_one_as_release: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +; GFX10: .amdhsa_kernel singlethread_one_as_release +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @singlethread_one_as_release() { +entry: + fence syncscope("singlethread-one-as") release + ret void +} + +; FUNC-LABEL: {{^}}singlethread_one_as_acq_rel: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +; GFX10: .amdhsa_kernel singlethread_one_as_acq_rel +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @singlethread_one_as_acq_rel() { +entry: + fence syncscope("singlethread-one-as") acq_rel + ret void +} + +; FUNC-LABEL: {{^}}singlethread_one_as_seq_cst: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +; GFX10: .amdhsa_kernel singlethread_one_as_seq_cst +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @singlethread_one_as_seq_cst() { +entry: + fence syncscope("singlethread-one-as") seq_cst + ret void +} + +; FUNC-LABEL: {{^}}agent_one_as_acquire: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GFX6: s_waitcnt vmcnt(0){{$}} +; GFX6-NEXT: buffer_wbinvl1{{$}} +; GFX8: s_waitcnt vmcnt(0){{$}} +; GFX8-NEXT: buffer_wbinvl1_vol{{$}} +; GFX10: s_waitcnt vmcnt(0){{$}} +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}} +; GFX10-NEXT: buffer_gl0_inv{{$}} +; GFX10-NEXT: buffer_gl1_inv{{$}} +; GCN: s_endpgm +; GFX10: .amdhsa_kernel agent_one_as_acquire +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @agent_one_as_acquire() { +entry: + fence syncscope("agent-one-as") acquire + ret void +} + +; FUNC-LABEL: {{^}}agent_one_as_release: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_waitcnt vmcnt(0){{$}} +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}} +; GCN: s_endpgm +; GFX10: .amdhsa_kernel agent_one_as_release +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @agent_one_as_release() { +entry: + fence syncscope("agent-one-as") release + ret void +} + +; FUNC-LABEL: {{^}}agent_one_as_acq_rel: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_waitcnt vmcnt(0){{$}} +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}} +; GFX6: buffer_wbinvl1{{$}} +; GFX8: buffer_wbinvl1_vol{{$}} +; GFX10-NEXT: buffer_gl0_inv{{$}} +; GFX10-NEXT: buffer_gl1_inv{{$}} +; GCN: s_endpgm +; GFX10: .amdhsa_kernel agent_one_as_acq_rel +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @agent_one_as_acq_rel() { +entry: + fence syncscope("agent-one-as") acq_rel + ret void +} + +; FUNC-LABEL: {{^}}agent_one_as_seq_cst: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_waitcnt vmcnt(0){{$}} +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}} +; GFX6: buffer_wbinvl1{{$}} +; GFX8: buffer_wbinvl1_vol{{$}} +; GFX10-NEXT: buffer_gl0_inv{{$}} +; GFX10-NEXT: buffer_gl1_inv{{$}} +; GCN: s_endpgm +; GFX10: .amdhsa_kernel agent_one_as_seq_cst +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @agent_one_as_seq_cst() { +entry: + fence syncscope("agent-one-as") seq_cst + ret void +} + +; FUNC-LABEL: {{^}}workgroup_one_as_acquire: +; GCN: %bb.0 +; GFX68-NOT: s_waitcnt vmcnt(0){{$}} +; GFX10WGP: s_waitcnt vmcnt(0){{$}} +; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}} +; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}} +; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}} +; GFX10WGP-NEXT: buffer_gl0_inv{{$}} +; GFX10CU-NOT: buffer_gl0_inv{{$}} +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +; GFX10: .amdhsa_kernel workgroup_one_as_acquire +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @workgroup_one_as_acquire() { +entry: + fence syncscope("workgroup-one-as") acquire + ret void +} + +; FUNC-LABEL: {{^}}workgroup_one_as_release: +; GCN: %bb.0 +; GFX68-NOT: s_waitcnt vmcnt(0){{$}} +; GFX10WGP: s_waitcnt vmcnt(0){{$}} +; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}} +; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}} +; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}} +; GFX10-NOT: buffer_gl0_inv +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +; GFX10: .amdhsa_kernel workgroup_one_as_release +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @workgroup_one_as_release() { +entry: + fence syncscope("workgroup-one-as") release + ret void +} + +; FUNC-LABEL: {{^}}workgroup_one_as_acq_rel: +; GCN: %bb.0 +; GFX68-NOT: s_waitcnt vmcnt(0){{$}} +; GFX10WGP: s_waitcnt vmcnt(0){{$}} +; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}} +; GFX10WGP-NEXT: buffer_gl0_inv{{$}} +; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}} +; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}} +; GFX10CU-NOT: buffer_gl0_inv{{$}} +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +; GFX10: .amdhsa_kernel workgroup_one_as_acq_rel +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @workgroup_one_as_acq_rel() { +entry: + fence syncscope("workgroup-one-as") acq_rel + ret void +} + +; FUNC-LABEL: {{^}}workgroup_one_as_seq_cst: +; GCN: %bb.0 +; GFX68-NOT: s_waitcnt vmcnt(0){{$}} +; GFX10WGP: s_waitcnt vmcnt(0){{$}} +; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}} +; GFX10WGP-NEXT: buffer_gl0_inv{{$}} +; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}} +; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}} +; GFX10CU-NOT: buffer_gl0_inv{{$}} +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +; GFX10: .amdhsa_kernel workgroup_one_as_seq_cst +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @workgroup_one_as_seq_cst() { +entry: + fence syncscope("workgroup-one-as") seq_cst + ret void +} + +; FUNC-LABEL: {{^}}wavefront_one_as_acquire: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +; GFX10: .amdhsa_kernel wavefront_one_as_acquire +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @wavefront_one_as_acquire() { +entry: + fence syncscope("wavefront-one-as") acquire + ret void +} + +; FUNC-LABEL: {{^}}wavefront_one_as_release: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +; GFX10: .amdhsa_kernel wavefront_one_as_release +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @wavefront_one_as_release() { +entry: + fence syncscope("wavefront-one-as") release + ret void +} + +; FUNC-LABEL: {{^}}wavefront_one_as_acq_rel: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +; GFX10: .amdhsa_kernel wavefront_one_as_acq_rel +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @wavefront_one_as_acq_rel() { +entry: + fence syncscope("wavefront-one-as") acq_rel + ret void +} + +; FUNC-LABEL: {{^}}wavefront_one_as_seq_cst: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +; GFX10: .amdhsa_kernel wavefront_one_as_seq_cst +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @wavefront_one_as_seq_cst() { +entry: + fence syncscope("wavefront-one-as") seq_cst + ret void +} + +; FUNC-LABEL: {{^}}system_acquire: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GFX6: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GFX6-NEXT: buffer_wbinvl1{{$}} +; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GFX8-NEXT: buffer_wbinvl1_vol{{$}} +; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}} +; GFX10-NEXT: buffer_gl0_inv{{$}} +; GFX10-NEXT: buffer_gl1_inv{{$}} +; GCN: s_endpgm +; GFX10: .amdhsa_kernel system_acquire +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @system_acquire() { +entry: + fence acquire + ret void +} + +; FUNC-LABEL: {{^}}system_release: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GFX6: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}} +; GCN: s_endpgm +; GFX10: .amdhsa_kernel system_release +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @system_release() { +entry: + fence release + ret void +} + +; FUNC-LABEL: {{^}}system_acq_rel: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GFX6: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}} +; GFX6: buffer_wbinvl1{{$}} +; GFX8: buffer_wbinvl1_vol{{$}} +; GFX10-NEXT: buffer_gl0_inv{{$}} +; GFX10-NEXT: buffer_gl1_inv{{$}} +; GCN: s_endpgm +; GFX10: .amdhsa_kernel system_acq_rel +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @system_acq_rel() { +entry: + fence acq_rel + ret void +} + +; FUNC-LABEL: {{^}}system_seq_cst: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GFX6: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}} +; GFX6: buffer_wbinvl1{{$}} +; GFX8: buffer_wbinvl1_vol{{$}} +; GFX10-NEXT: buffer_gl0_inv{{$}} +; GFX10-NEXT: buffer_gl1_inv{{$}} +; GCN: s_endpgm +; GFX10: .amdhsa_kernel system_seq_cst +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @system_seq_cst() { +entry: + fence seq_cst + ret void +} + +; FUNC-LABEL: {{^}}singlethread_acquire: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +; GFX10: .amdhsa_kernel singlethread_acquire +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @singlethread_acquire() { +entry: + fence syncscope("singlethread") acquire + ret void +} + +; FUNC-LABEL: {{^}}singlethread_release: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +; GFX10: .amdhsa_kernel singlethread_release +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @singlethread_release() { +entry: + fence syncscope("singlethread") release + ret void +} + +; FUNC-LABEL: {{^}}singlethread_acq_rel: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +; GFX10: .amdhsa_kernel singlethread_acq_rel +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @singlethread_acq_rel() { +entry: + fence syncscope("singlethread") acq_rel + ret void +} + +; FUNC-LABEL: {{^}}singlethread_seq_cst: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +; GFX10: .amdhsa_kernel singlethread_seq_cst +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @singlethread_seq_cst() { +entry: + fence syncscope("singlethread") seq_cst + ret void +} + +; FUNC-LABEL: {{^}}agent_acquire: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GFX6: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GFX6-NEXT: buffer_wbinvl1{{$}} +; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GFX8-NEXT: buffer_wbinvl1_vol{{$}} +; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}} +; GFX10-NEXT: buffer_gl0_inv{{$}} +; GFX10-NEXT: buffer_gl1_inv{{$}} +; GCN: s_endpgm +; GFX10: .amdhsa_kernel agent_acquire +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @agent_acquire() { +entry: + fence syncscope("agent") acquire + ret void +} + +; FUNC-LABEL: {{^}}agent_release: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GFX6: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}} +; GCN: s_endpgm +; GFX10: .amdhsa_kernel agent_release +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @agent_release() { +entry: + fence syncscope("agent") release + ret void +} + +; FUNC-LABEL: {{^}}agent_acq_rel: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GFX6: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}} +; GFX6: buffer_wbinvl1{{$}} +; GFX8: buffer_wbinvl1_vol{{$}} +; GFX10-NEXT: buffer_gl0_inv{{$}} +; GFX10-NEXT: buffer_gl1_inv{{$}} +; GCN: s_endpgm +; GFX10: .amdhsa_kernel agent_acq_rel +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @agent_acq_rel() { +entry: + fence syncscope("agent") acq_rel + ret void +} + +; FUNC-LABEL: {{^}}agent_seq_cst: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GFX6: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GFX8: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GFX10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0{{$}} +; GFX6: buffer_wbinvl1{{$}} +; GFX8: buffer_wbinvl1_vol{{$}} +; GFX10-NEXT: buffer_gl0_inv{{$}} +; GFX10-NEXT: buffer_gl1_inv{{$}} +; GCN: s_endpgm +; GFX10: .amdhsa_kernel agent_seq_cst +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @agent_seq_cst() { +entry: + fence syncscope("agent") seq_cst + ret void +} + +; FUNC-LABEL: {{^}}workgroup_acquire: +; GCN: %bb.0 +; GFX68-NOT: s_waitcnt vmcnt(0){{$}} +; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}} +; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}} +; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}} +; GFX10WGP-NEXT: buffer_gl0_inv{{$}} +; GFX10CU-NOT: buffer_gl0_inv{{$}} +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +; GFX10: .amdhsa_kernel workgroup_acquire +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @workgroup_acquire() { +entry: + fence syncscope("workgroup") acquire + ret void +} + +; FUNC-LABEL: {{^}}workgroup_release: +; GCN: %bb.0 +; GFX68-NOT: s_waitcnt vmcnt(0){{$}} +; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}} +; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}} +; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}} +; GFX10-NOT: buffer_gl0_inv +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +; GFX10: .amdhsa_kernel workgroup_release +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @workgroup_release() { +entry: + fence syncscope("workgroup") release + ret void +} + +; FUNC-LABEL: {{^}}workgroup_acq_rel: +; GCN: %bb.0 +; GFX68-NOT: s_waitcnt vmcnt(0){{$}} +; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}} +; GFX10WGP-NEXT: buffer_gl0_inv{{$}} +; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}} +; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}} +; GFX10CU-NOT: buffer_gl0_inv{{$}} +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +; GFX10: .amdhsa_kernel workgroup_acq_rel +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @workgroup_acq_rel() { +entry: + fence syncscope("workgroup") acq_rel + ret void +} + +; FUNC-LABEL: {{^}}workgroup_seq_cst: +; GCN: %bb.0 +; GFX68-NOT: s_waitcnt vmcnt(0){{$}} +; GFX10WGP: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GFX10WGP-NEXT: s_waitcnt_vscnt null, 0x0{{$}} +; GFX10WGP-NEXT: buffer_gl0_inv{{$}} +; GFX10CU-NOT: s_waitcnt vmcnt(0){{$}} +; GFX10CU-NOT: s_waitcnt_vscnt null, 0x0{{$}} +; GFX10CU-NOT: buffer_gl0_inv{{$}} +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +; GFX10: .amdhsa_kernel workgroup_seq_cst +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @workgroup_seq_cst() { +entry: + fence syncscope("workgroup") seq_cst + ret void +} + +; FUNC-LABEL: {{^}}wavefront_acquire: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +; GFX10: .amdhsa_kernel wavefront_acquire +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @wavefront_acquire() { +entry: + fence syncscope("wavefront") acquire + ret void +} + +; FUNC-LABEL: {{^}}wavefront_release: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +; GFX10: .amdhsa_kernel wavefront_release +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @wavefront_release() { +entry: + fence syncscope("wavefront") release + ret void +} + +; FUNC-LABEL: {{^}}wavefront_acq_rel: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +; GFX10: .amdhsa_kernel wavefront_acq_rel +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @wavefront_acq_rel() { +entry: + fence syncscope("wavefront") acq_rel + ret void +} + +; FUNC-LABEL: {{^}}wavefront_seq_cst: +; GCN: %bb.0 +; GCN-NOT: ATOMIC_FENCE +; GCN: s_endpgm +; GFX10: .amdhsa_kernel wavefront_seq_cst +; GFX10WGP-NOT: .amdhsa_workgroup_processor_mode 0 +; GFX10CU: .amdhsa_workgroup_processor_mode 0 +; GFX10-NOT: .amdhsa_memory_ordered 0 +define amdgpu_kernel void @wavefront_seq_cst() { +entry: + fence syncscope("wavefront") seq_cst + ret void +}