Index: lib/Target/AMDGPU/AMDGPU.h =================================================================== --- lib/Target/AMDGPU/AMDGPU.h +++ lib/Target/AMDGPU/AMDGPU.h @@ -43,6 +43,7 @@ FunctionPass *createSIWholeQuadModePass(); FunctionPass *createSIFixControlFlowLiveIntervalsPass(); FunctionPass *createSIFixSGPRCopiesPass(); +FunctionPass *createSIMemoryLegalizerPass(); FunctionPass *createSIDebuggerInsertNopsPass(); FunctionPass *createSIInsertWaitsPass(); FunctionPass *createAMDGPUCodeGenPreparePass(const GCNTargetMachine *TM = nullptr); @@ -98,6 +99,9 @@ void initializeSIAnnotateControlFlowPass(PassRegistry&); extern char &SIAnnotateControlFlowPassID; +void initializeSIMemoryLegalizerPass(PassRegistry&); +extern char &SIMemoryLegalizerID; + void initializeSIDebuggerInsertNopsPass(PassRegistry&); extern char &SIDebuggerInsertNopsID; Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -82,6 +82,7 @@ initializeSIWholeQuadModePass(*PR); initializeSILowerControlFlowPass(*PR); initializeSIInsertSkipsPass(*PR); + initializeSIMemoryLegalizerPass(*PR); initializeSIDebuggerInsertNopsPass(*PR); } @@ -590,6 +591,7 @@ addPass(createSIInsertWaitsPass()); addPass(createSIShrinkInstructionsPass()); addPass(&SIInsertSkipsPassID); + addPass(createSIMemoryLegalizerPass()); addPass(createSIDebuggerInsertNopsPass()); } Index: lib/Target/AMDGPU/BUFInstructions.td =================================================================== --- lib/Target/AMDGPU/BUFInstructions.td +++ lib/Target/AMDGPU/BUFInstructions.td @@ -929,12 +929,12 @@ def : Pat < (vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$slc))), - (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 1, $slc, 0) + (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0) >; def : Pat < (vt (atomic_ld (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset))), - (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 1, 0, 0) + (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0) >; } @@ -1015,12 +1015,12 @@ def : Pat < (atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$slc), vt:$val), - (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 1, $slc, 0) + (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0) >; def : Pat < (atomic_st (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val), - (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 1, 0, 0) + (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0) >; } let Predicates = [isSICI] in { Index: lib/Target/AMDGPU/CMakeLists.txt =================================================================== --- lib/Target/AMDGPU/CMakeLists.txt +++ lib/Target/AMDGPU/CMakeLists.txt @@ -77,6 +77,7 @@ SILowerI1Copies.cpp SIMachineFunctionInfo.cpp SIMachineScheduler.cpp + SIMemoryLegalizer.cpp SIRegisterInfo.cpp SIShrinkInstructions.cpp SITypeRewriter.cpp Index: lib/Target/AMDGPU/FLATInstructions.td =================================================================== --- lib/Target/AMDGPU/FLATInstructions.td +++ lib/Target/AMDGPU/FLATInstructions.td @@ -316,7 +316,7 @@ class FlatLoadAtomicPat : Pat < (vt (node i64:$addr)), - (inst $addr, 1, 0, 0) + (inst $addr, 0, 0, 0) >; class FlatStorePat : Pat < @@ -328,7 +328,7 @@ // atomic store follows atomic binop convention so the address comes // first. (node i64:$addr, vt:$data), - (inst $addr, $data, 1, 0, 0) + (inst $addr, $data, 0, 0, 0) >; class FlatAtomicPat { + let hasSideEffects = 1; + let isCodeGenOnly = 1; + let isPseudo = 1; + let SALU = 1; +} + let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] in { // For use in patterns Index: lib/Target/AMDGPU/SIMemoryLegalizer.cpp =================================================================== --- /dev/null +++ lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -0,0 +1,454 @@ +//===--- SIMemoryLegalizer.cpp - Legalizes memory operations --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Legalizes memory operations. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDGPUSubtarget.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/IR/DiagnosticInfo.h" +using namespace llvm; + +#define DEBUG_TYPE "si-memory-legalizer" +#define PASS_NAME "SI Memory Legalizer" + +namespace { + +class SIMemoryLegalizer final : public MachineFunctionPass { +private: + /// \brief Immediate for "vmcnt(0)". + static const unsigned Vmcnt0; + + /// \brief Target instruction info. + const SIInstrInfo *TII; + /// \brief LLVM context. + LLVMContext *CTX; + /// \brief List of atomic pseudo machine instructions. + std::list AtomicPseudoMI; + + /// \brief Inserts "buffer_wbinvl1_vol" instruction before \p MI. Always + /// returns true. + bool InsertBufferWbinvl1Vol(const MachineBasicBlock::iterator &MI) const; + /// \brief Inserts "s_waitcnt vmcnt(0)" instruction before \p MI. Always + /// returns true. + bool InsertWaitcntVmcnt0(const MachineBasicBlock::iterator &MI) const; + + /// \brief Sets GLC bit if present in \p MI. Returns true if \p MI is + /// modified, false otherwise. + bool SetGLC(const MachineBasicBlock::iterator &MI) const; + + /// \brief Removes all processed atomic pseudo machine instructions from the + /// current function. Returns true if current function is modified, false + /// otherwise. + bool RemoveAtomicPseudoMI(); + + /// \brief Reports unknown synchronization scope used in \p MI to LLVM + /// context. + void ReportUnknownSynchScope(const MachineBasicBlock::iterator &MI); + + /// \returns True if \p MI is atomic fence operation, false otherwise. + bool IsAtomicFence(const MachineBasicBlock::iterator &MI) const; + /// \returns True if \p MI is atomic load operation, false otherwise. + bool IsAtomicLoad(const MachineBasicBlock::iterator &MI) const; + /// \returns True if \p MI is atomic store operation, false otherwise. + bool IsAtomicStore(const MachineBasicBlock::iterator &MI) const; + /// \returns True if \p MI is atomic cmpxchg operation, false otherwise. + bool IsAtomicCmpxchg(const MachineBasicBlock::iterator &MI) const; + /// \returns True if \p MI is atomic rmw operation, false otherwise. + bool IsAtomicRmw(const MachineBasicBlock::iterator &MI) const; + + /// \brief Expands atomic fence operation. Returns true if instructions are + /// added/deleted or \p MI is modified, false otherwise. + bool ExpandAtomicFence(MachineBasicBlock::iterator &MI); + /// \brief Expands atomic load operation. Returns true if instructions are + /// added/deleted or \p MI is modified, false otherwise. + bool ExpandAtomicLoad(MachineBasicBlock::iterator &MI); + /// \brief Expands atomic store operation. Returns true if instructions are + /// added/deleted or \p MI is modified, false otherwise. + bool ExpandAtomicStore(MachineBasicBlock::iterator &MI); + /// \brief Expands atomic cmpxchg operation. Returns true if instructions are + /// added/deleted or \p MI is modified, false otherwise. + bool ExpandAtomicCmpxchg(MachineBasicBlock::iterator &MI); + /// \brief Expands atomic rmw operation. Returns true if instructions are + /// added/deleted or \p MI is modified, false otherwise. + bool ExpandAtomicRmw(MachineBasicBlock::iterator &MI); + +public: + static char ID; + + SIMemoryLegalizer() + : MachineFunctionPass(ID), TII(nullptr), CTX(nullptr) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + const char *getPassName() const override { + return PASS_NAME; + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; + +} // anonymous namespace + +INITIALIZE_PASS(SIMemoryLegalizer, DEBUG_TYPE, PASS_NAME, false, false) + +const unsigned SIMemoryLegalizer::Vmcnt0 = 0x7 << 4 | 0xF << 8; +char SIMemoryLegalizer::ID = 0; +char &llvm::SIMemoryLegalizerID = SIMemoryLegalizer::ID; + +FunctionPass *llvm::createSIMemoryLegalizerPass() { + return new SIMemoryLegalizer(); +} + +bool SIMemoryLegalizer::InsertBufferWbinvl1Vol( + const MachineBasicBlock::iterator &MI) const { + MachineBasicBlock &MBB = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + + BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBINVL1_VOL)); + return true; +} + +bool SIMemoryLegalizer::InsertWaitcntVmcnt0( + const MachineBasicBlock::iterator &MI) const { + MachineBasicBlock &MBB = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + + BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(Vmcnt0); + return true; +} + +bool SIMemoryLegalizer::SetGLC(const MachineBasicBlock::iterator &MI) const { + int GLCIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::glc); + if (GLCIdx == -1) + return false; + + MachineOperand &GLC = MI->getOperand(GLCIdx); + if (GLC.getImm() == 1) + return false; + + GLC.setImm(1); + return true; +} + +bool SIMemoryLegalizer::RemoveAtomicPseudoMI() { + if (AtomicPseudoMI.empty()) + return false; + + for (auto &MI : AtomicPseudoMI) + MI->eraseFromParent(); + + AtomicPseudoMI.clear(); + return true; +} + +void SIMemoryLegalizer::ReportUnknownSynchScope( + const MachineBasicBlock::iterator &MI) { + DiagnosticInfoUnsupported Diag( + *MI->getParent()->getParent()->getFunction(), + "Unknown synchronization scope"); + CTX->diagnose(Diag); +} + +bool SIMemoryLegalizer::IsAtomicFence( + const MachineBasicBlock::iterator &MI) const { + return MI->getOpcode() == AMDGPU::ATOMIC_FENCE; +} + +bool SIMemoryLegalizer::IsAtomicLoad( + const MachineBasicBlock::iterator &MI) const { + if (!MI->hasOneMemOperand()) + return false; + + const MachineMemOperand *MMO = *MI->memoperands_begin(); + return MMO->isAtomic() && MMO->isLoad() && !MMO->isStore() && + MMO->getFailureOrdering() == AtomicOrdering::NotAtomic; +} + +bool SIMemoryLegalizer::IsAtomicStore( + const MachineBasicBlock::iterator &MI) const { + if (!MI->hasOneMemOperand()) + return false; + + const MachineMemOperand *MMO = *MI->memoperands_begin(); + return MMO->isAtomic() && !MMO->isLoad() && MMO->isStore() && + MMO->getFailureOrdering() == AtomicOrdering::NotAtomic; +} + +bool SIMemoryLegalizer::IsAtomicCmpxchg( + const MachineBasicBlock::iterator &MI) const { + if (!MI->hasOneMemOperand()) + return false; + + const MachineMemOperand *MMO = *MI->memoperands_begin(); + return MMO->isAtomic() && MMO->isLoad() && MMO->isStore() && + MMO->getFailureOrdering() != AtomicOrdering::NotAtomic; +} + +bool SIMemoryLegalizer::IsAtomicRmw( + const MachineBasicBlock::iterator &MI) const { + if (!MI->hasOneMemOperand()) + return false; + + const MachineMemOperand *MMO = *MI->memoperands_begin(); + return MMO->isAtomic() && MMO->isLoad() && MMO->isStore() && + MMO->getFailureOrdering() == AtomicOrdering::NotAtomic; +} + +bool SIMemoryLegalizer::ExpandAtomicFence(MachineBasicBlock::iterator &MI) { + assert(IsAtomicFence(MI) && "Must be atomic fence"); + + bool Changed = false; + + AtomicOrdering Ordering = + static_cast(MI->getOperand(0).getImm()); + AMDGPUSynchronizationScope SynchScope = + static_cast(MI->getOperand(1).getImm()); + + switch (SynchScope) { + case AMDGPUSynchronizationScope::System: + case AMDGPUSynchronizationScope::Agent: { + if (Ordering == AtomicOrdering::Release || + Ordering == AtomicOrdering::AcquireRelease || + Ordering == AtomicOrdering::SequentiallyConsistent) + Changed |= InsertWaitcntVmcnt0(MI); + + if (Ordering == AtomicOrdering::Acquire || + Ordering == AtomicOrdering::AcquireRelease || + Ordering == AtomicOrdering::SequentiallyConsistent) + Changed |= InsertBufferWbinvl1Vol(MI); + + break; + } + case AMDGPUSynchronizationScope::WorkGroup: + case AMDGPUSynchronizationScope::Wavefront: + case AMDGPUSynchronizationScope::Image: + case AMDGPUSynchronizationScope::SignalHandler: { + break; + } + default: { + ReportUnknownSynchScope(MI); + break; + } + } + + AtomicPseudoMI.push_back(MI); + return Changed; +} + +bool SIMemoryLegalizer::ExpandAtomicLoad(MachineBasicBlock::iterator &MI) { + assert(IsAtomicLoad(MI) && "Must be atomic load"); + + bool Changed = false; + + const MachineMemOperand *MMO = *MI->memoperands_begin(); + AtomicOrdering Ordering = MMO->getOrdering(); + AMDGPUSynchronizationScope SynchScope = + static_cast(MMO->getSynchScope()); + + switch (SynchScope) { + case AMDGPUSynchronizationScope::System: + case AMDGPUSynchronizationScope::Agent: { + if (Ordering == AtomicOrdering::Monotonic || + Ordering == AtomicOrdering::Acquire || + Ordering == AtomicOrdering::SequentiallyConsistent) + Changed |= SetGLC(MI); + + if (Ordering == AtomicOrdering::SequentiallyConsistent) + Changed |= InsertWaitcntVmcnt0(MI); + + if (Ordering == AtomicOrdering::Acquire || + Ordering == AtomicOrdering::SequentiallyConsistent) { + ++MI; + Changed |= InsertWaitcntVmcnt0(MI); + Changed |= InsertBufferWbinvl1Vol(MI); + --MI; + } + + break; + } + case AMDGPUSynchronizationScope::WorkGroup: + case AMDGPUSynchronizationScope::Wavefront: + case AMDGPUSynchronizationScope::Image: + case AMDGPUSynchronizationScope::SignalHandler: { + break; + } + default: { + ReportUnknownSynchScope(MI); + break; + } + } + + return Changed; +} + +bool SIMemoryLegalizer::ExpandAtomicStore(MachineBasicBlock::iterator &MI) { + assert(IsAtomicStore(MI) && "Must be atomic store"); + + bool Changed = false; + + const MachineMemOperand *MMO = *MI->memoperands_begin(); + AtomicOrdering Ordering = MMO->getOrdering(); + AMDGPUSynchronizationScope SynchScope = + static_cast(MMO->getSynchScope()); + + switch (SynchScope) { + case AMDGPUSynchronizationScope::System: + case AMDGPUSynchronizationScope::Agent: { + if (Ordering == AtomicOrdering::Release || + Ordering == AtomicOrdering::SequentiallyConsistent) + Changed |= InsertWaitcntVmcnt0(MI); + + break; + } + case AMDGPUSynchronizationScope::WorkGroup: + case AMDGPUSynchronizationScope::Wavefront: + case AMDGPUSynchronizationScope::Image: + case AMDGPUSynchronizationScope::SignalHandler: { + break; + } + default: { + ReportUnknownSynchScope(MI); + break; + } + } + + return Changed; +} + +bool SIMemoryLegalizer::ExpandAtomicCmpxchg(MachineBasicBlock::iterator &MI) { + assert(IsAtomicCmpxchg(MI) && "Must be atomic cmpxchg"); + + bool Changed = false; + + const MachineMemOperand *MMO = *MI->memoperands_begin(); + AtomicOrdering SuccessOrdering = MMO->getSuccessOrdering(); + AtomicOrdering FailureOrdering = MMO->getFailureOrdering(); + AMDGPUSynchronizationScope SynchScope = + static_cast(MMO->getSynchScope()); + + switch (SynchScope) { + case AMDGPUSynchronizationScope::System: + case AMDGPUSynchronizationScope::Agent: { + Changed |= SetGLC(MI); + + if (SuccessOrdering == AtomicOrdering::Release || + SuccessOrdering == AtomicOrdering::AcquireRelease || + SuccessOrdering == AtomicOrdering::SequentiallyConsistent || + FailureOrdering == AtomicOrdering::SequentiallyConsistent) + Changed |= InsertWaitcntVmcnt0(MI); + + if (SuccessOrdering == AtomicOrdering::Acquire || + SuccessOrdering == AtomicOrdering::AcquireRelease || + SuccessOrdering == AtomicOrdering::SequentiallyConsistent || + FailureOrdering == AtomicOrdering::Acquire || + FailureOrdering == AtomicOrdering::SequentiallyConsistent) { + ++MI; + Changed |= InsertWaitcntVmcnt0(MI); + Changed |= InsertBufferWbinvl1Vol(MI); + --MI; + } + + break; + } + case AMDGPUSynchronizationScope::WorkGroup: + case AMDGPUSynchronizationScope::Wavefront: + case AMDGPUSynchronizationScope::Image: + case AMDGPUSynchronizationScope::SignalHandler: { + Changed |= SetGLC(MI); + break; + } + default: { + ReportUnknownSynchScope(MI); + break; + } + } + + return Changed; +} + +bool SIMemoryLegalizer::ExpandAtomicRmw(MachineBasicBlock::iterator &MI) { + assert(IsAtomicRmw(MI) && "Must be atomic rmw"); + + bool Changed = false; + + const MachineMemOperand *MMO = *MI->memoperands_begin(); + AtomicOrdering Ordering = MMO->getOrdering(); + AMDGPUSynchronizationScope SynchScope = + static_cast(MMO->getSynchScope()); + + switch (SynchScope) { + case AMDGPUSynchronizationScope::System: + case AMDGPUSynchronizationScope::Agent: { + Changed |= SetGLC(MI); + + if (Ordering == AtomicOrdering::Release || + Ordering == AtomicOrdering::AcquireRelease || + Ordering == AtomicOrdering::SequentiallyConsistent) + Changed |= InsertWaitcntVmcnt0(MI); + + if (Ordering == AtomicOrdering::Acquire || + Ordering == AtomicOrdering::AcquireRelease || + Ordering == AtomicOrdering::SequentiallyConsistent) { + ++MI; + Changed |= InsertWaitcntVmcnt0(MI); + Changed |= InsertBufferWbinvl1Vol(MI); + --MI; + } + + break; + } + case AMDGPUSynchronizationScope::WorkGroup: + case AMDGPUSynchronizationScope::Wavefront: + case AMDGPUSynchronizationScope::Image: + case AMDGPUSynchronizationScope::SignalHandler: { + Changed |= SetGLC(MI); + break; + } + default: { + ReportUnknownSynchScope(MI); + break; + } + } + + return Changed; +} + +bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; + + TII = MF.getSubtarget().getInstrInfo(); + CTX = &MF.getFunction()->getContext(); + + for (auto &MBB : MF) { + for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) { + if (IsAtomicFence(MI)) + Changed |= ExpandAtomicFence(MI); + else if (IsAtomicLoad(MI)) + Changed |= ExpandAtomicLoad(MI); + else if (IsAtomicStore(MI)) + Changed |= ExpandAtomicStore(MI); + else if (IsAtomicCmpxchg(MI)) + Changed |= ExpandAtomicCmpxchg(MI); + else if (IsAtomicRmw(MI)) + Changed |= ExpandAtomicRmw(MI); + } + } + + Changed |= RemoveAtomicPseudoMI(); + return Changed; +} Index: test/CodeGen/AMDGPU/flat_atomics.ll =================================================================== --- test/CodeGen/AMDGPU/flat_atomics.ll +++ test/CodeGen/AMDGPU/flat_atomics.ll @@ -899,7 +899,7 @@ } ; GCN-LABEL: {{^}}atomic_load_i32: -; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define void @atomic_load_i32(i32 addrspace(4)* %in, i32 addrspace(4)* %out) { entry: @@ -932,7 +932,7 @@ } ; GCN-LABEL: {{^}}atomic_store_i32_offset: -; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define void @atomic_store_i32_offset(i32 %in, i32 addrspace(4)* %out) { entry: %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 @@ -941,7 +941,7 @@ } ; GCN-LABEL: {{^}}atomic_store_i32: -; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define void @atomic_store_i32(i32 %in, i32 addrspace(4)* %out) { entry: store atomic i32 %in, i32 addrspace(4)* %out seq_cst, align 4 @@ -949,7 +949,7 @@ } ; GCN-LABEL: {{^}}atomic_store_i32_addr64_offset: -; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define void @atomic_store_i32_addr64_offset(i32 %in, i32 addrspace(4)* %out, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index @@ -959,7 +959,7 @@ } ; GCN-LABEL: {{^}}atomic_store_i32_addr64: -; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define void @atomic_store_i32_addr64(i32 %in, i32 addrspace(4)* %out, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(4)* %out, i64 %index Index: test/CodeGen/AMDGPU/flat_atomics_i64.ll =================================================================== --- test/CodeGen/AMDGPU/flat_atomics_i64.ll +++ test/CodeGen/AMDGPU/flat_atomics_i64.ll @@ -813,7 +813,7 @@ } ; GCN-LABEL: {{^}}atomic_load_i64: -; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc +; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define void @atomic_load_i64(i64 addrspace(4)* %in, i64 addrspace(4)* %out) { entry: @@ -846,7 +846,7 @@ } ; GCN-LABEL: {{^}}atomic_store_i64_offset: -; GCN: flat_store_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} +; GCN: flat_store_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} define void @atomic_store_i64_offset(i64 %in, i64 addrspace(4)* %out) { entry: %gep = getelementptr i64, i64 addrspace(4)* %out, i64 4 @@ -855,7 +855,7 @@ } ; GCN-LABEL: {{^}}atomic_store_i64: -; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}] glc +; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}]{{$}} define void @atomic_store_i64(i64 %in, i64 addrspace(4)* %out) { entry: store atomic i64 %in, i64 addrspace(4)* %out seq_cst, align 8 @@ -863,7 +863,7 @@ } ; GCN-LABEL: {{^}}atomic_store_i64_addr64_offset: -; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}] glc{{$}} +; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}} define void @atomic_store_i64_addr64_offset(i64 %in, i64 addrspace(4)* %out, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index @@ -873,7 +873,7 @@ } ; GCN-LABEL: {{^}}atomic_store_i64_addr64: -; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}] glc{{$}} +; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}} define void @atomic_store_i64_addr64(i64 %in, i64 addrspace(4)* %out, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(4)* %out, i64 %index Index: test/CodeGen/AMDGPU/global_atomics.ll =================================================================== --- test/CodeGen/AMDGPU/global_atomics.ll +++ test/CodeGen/AMDGPU/global_atomics.ll @@ -968,8 +968,8 @@ } ; FUNC-LABEL: {{^}}atomic_load_i32: -; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc +; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc{{$}} +; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_load_i32(i32 addrspace(1)* %in, i32 addrspace(1)* %out) { entry: @@ -1004,8 +1004,8 @@ } ; FUNC-LABEL: {{^}}atomic_store_i32_offset: -; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} -; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}} +; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define void @atomic_store_i32_offset(i32 %in, i32 addrspace(1)* %out) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 @@ -1014,8 +1014,8 @@ } ; FUNC-LABEL: {{^}}atomic_store_i32: -; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc{{$}} -; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}} +; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define void @atomic_store_i32(i32 %in, i32 addrspace(1)* %out) { entry: store atomic i32 %in, i32 addrspace(1)* %out seq_cst, align 4 @@ -1023,8 +1023,8 @@ } ; FUNC-LABEL: {{^}}atomic_store_i32_addr64_offset: -; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} -; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}} +; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} +; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define void @atomic_store_i32_addr64_offset(i32 %in, i32 addrspace(1)* %out, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -1034,8 +1034,8 @@ } ; FUNC-LABEL: {{^}}atomic_store_i32_addr64: -; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} -; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}} +; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} +; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} define void @atomic_store_i32_addr64(i32 %in, i32 addrspace(1)* %out, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index Index: test/CodeGen/AMDGPU/global_atomics_i64.ll =================================================================== --- test/CodeGen/AMDGPU/global_atomics_i64.ll +++ test/CodeGen/AMDGPU/global_atomics_i64.ll @@ -961,8 +961,8 @@ } ; FUNC-LABEL: {{^}}atomic_load_i64: -; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc +; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc{{$}} +; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} ; GCN: buffer_store_dwordx2 [[RET]] define void @atomic_load_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %out) { entry: @@ -997,8 +997,8 @@ } ; FUNC-LABEL: {{^}}atomic_store_i64_offset: -; CI: buffer_store_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} -; VI: flat_store_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} +; CI: buffer_store_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} +; VI: flat_store_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} define void @atomic_store_i64_offset(i64 %in, i64 addrspace(1)* %out) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 @@ -1007,8 +1007,8 @@ } ; FUNC-LABEL: {{^}}atomic_store_i64: -; CI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}] glc +; CI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}]{{$}} define void @atomic_store_i64(i64 %in, i64 addrspace(1)* %out) { entry: store atomic i64 %in, i64 addrspace(1)* %out seq_cst, align 8 @@ -1016,8 +1016,8 @@ } ; FUNC-LABEL: {{^}}atomic_store_i64_addr64_offset: -; CI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}} -; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}] glc{{$}} +; CI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}} +; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}} define void @atomic_store_i64_addr64_offset(i64 %in, i64 addrspace(1)* %out, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -1027,8 +1027,8 @@ } ; FUNC-LABEL: {{^}}atomic_store_i64_addr64: -; CI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} -; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}] glc{{$}} +; CI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} +; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}} define void @atomic_store_i64_addr64(i64 %in, i64 addrspace(1)* %out, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index Index: test/CodeGen/AMDGPU/memory-model-atomic-cmpxchg.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/memory-model-atomic-cmpxchg.ll @@ -0,0 +1,661 @@ +; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s + +; CHECK-LABEL: {{^}}system_monotonic_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @system_monotonic_monotonic(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in monotonic monotonic + ret void +} + +; CHECK-LABEL: {{^}}system_acquire_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: buffer_wbinvl1_vol +define void @system_acquire_monotonic(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in acquire monotonic + ret void +} + +; CHECK-LABEL: {{^}}system_release_monotonic +; CHECK: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @system_release_monotonic(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in release monotonic + ret void +} + +; CHECK-LABEL: {{^}}system_acq_rel_monotonic +; CHECK: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: buffer_wbinvl1_vol +define void @system_acq_rel_monotonic(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in acq_rel monotonic + ret void +} + +; CHECK-LABEL: {{^}}system_seq_cst_monotonic +; CHECK: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: buffer_wbinvl1_vol +define void @system_seq_cst_monotonic(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in seq_cst monotonic + ret void +} + +; CHECK-LABEL: {{^}}system_acquire_acquire +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: buffer_wbinvl1_vol +define void @system_acquire_acquire(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in acquire acquire + ret void +} + +; CHECK-LABEL: {{^}}system_release_acquire +; CHECK: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: buffer_wbinvl1_vol +define void @system_release_acquire(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in release acquire + ret void +} + +; CHECK-LABEL: {{^}}system_acq_rel_acquire +; CHECK: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: buffer_wbinvl1_vol +define void @system_acq_rel_acquire(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in acq_rel acquire + ret void +} + +; CHECK-LABEL: {{^}}system_seq_cst_acquire +; CHECK: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: buffer_wbinvl1_vol +define void @system_seq_cst_acquire(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in seq_cst acquire + ret void +} + +; CHECK-LABEL: {{^}}system_seq_cst_seq_cst +; CHECK: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: buffer_wbinvl1_vol +define void @system_seq_cst_seq_cst(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in seq_cst seq_cst + ret void +} + +; CHECK-LABEL: {{^}}agent_monotonic_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @agent_monotonic_monotonic(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(2) monotonic monotonic + ret void +} + +; CHECK-LABEL: {{^}}agent_acquire_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: buffer_wbinvl1_vol +define void @agent_acquire_monotonic(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(2) acquire monotonic + ret void +} + +; CHECK-LABEL: {{^}}agent_release_monotonic +; CHECK: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @agent_release_monotonic(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(2) release monotonic + ret void +} + +; CHECK-LABEL: {{^}}agent_acq_rel_monotonic +; CHECK: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: buffer_wbinvl1_vol +define void @agent_acq_rel_monotonic(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(2) acq_rel monotonic + ret void +} + +; CHECK-LABEL: {{^}}agent_seq_cst_monotonic +; CHECK: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: buffer_wbinvl1_vol +define void @agent_seq_cst_monotonic(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(2) seq_cst monotonic + ret void +} + +; CHECK-LABEL: {{^}}agent_acquire_acquire +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: buffer_wbinvl1_vol +define void @agent_acquire_acquire(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(2) acquire acquire + ret void +} + +; CHECK-LABEL: {{^}}agent_release_acquire +; CHECK: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: buffer_wbinvl1_vol +define void @agent_release_acquire(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(2) release acquire + ret void +} + +; CHECK-LABEL: {{^}}agent_acq_rel_acquire +; CHECK: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: buffer_wbinvl1_vol +define void @agent_acq_rel_acquire(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(2) acq_rel acquire + ret void +} + +; CHECK-LABEL: {{^}}agent_seq_cst_acquire +; CHECK: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: buffer_wbinvl1_vol +define void @agent_seq_cst_acquire(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(2) seq_cst acquire + ret void +} + +; CHECK-LABEL: {{^}}agent_seq_cst_seq_cst +; CHECK: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: buffer_wbinvl1_vol +define void @agent_seq_cst_seq_cst(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(2) seq_cst seq_cst + ret void +} + +; CHECK-LABEL: {{^}}work_group_monotonic_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @work_group_monotonic_monotonic(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(3) monotonic monotonic + ret void +} + +; CHECK-LABEL: {{^}}work_group_acquire_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @work_group_acquire_monotonic(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(3) acquire monotonic + ret void +} + +; CHECK-LABEL: {{^}}work_group_release_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @work_group_release_monotonic(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(3) release monotonic + ret void +} + +; CHECK-LABEL: {{^}}work_group_acq_rel_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @work_group_acq_rel_monotonic(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(3) acq_rel monotonic + ret void +} + +; CHECK-LABEL: {{^}}work_group_seq_cst_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @work_group_seq_cst_monotonic(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(3) seq_cst monotonic + ret void +} + +; CHECK-LABEL: {{^}}work_group_acquire_acquire +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @work_group_acquire_acquire(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(3) acquire acquire + ret void +} + +; CHECK-LABEL: {{^}}work_group_release_acquire +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @work_group_release_acquire(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(3) release acquire + ret void +} + +; CHECK-LABEL: {{^}}work_group_acq_rel_acquire +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @work_group_acq_rel_acquire(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(3) acq_rel acquire + ret void +} + +; CHECK-LABEL: {{^}}work_group_seq_cst_acquire +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @work_group_seq_cst_acquire(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(3) seq_cst acquire + ret void +} + +; CHECK-LABEL: {{^}}work_group_seq_cst_seq_cst +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @work_group_seq_cst_seq_cst(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(3) seq_cst seq_cst + ret void +} + +; CHECK-LABEL: {{^}}wavefront_monotonic_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @wavefront_monotonic_monotonic(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(4) monotonic monotonic + ret void +} + +; CHECK-LABEL: {{^}}wavefront_acquire_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @wavefront_acquire_monotonic(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(4) acquire monotonic + ret void +} + +; CHECK-LABEL: {{^}}wavefront_release_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @wavefront_release_monotonic(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(4) release monotonic + ret void +} + +; CHECK-LABEL: {{^}}wavefront_acq_rel_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @wavefront_acq_rel_monotonic(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(4) acq_rel monotonic + ret void +} + +; CHECK-LABEL: {{^}}wavefront_seq_cst_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @wavefront_seq_cst_monotonic(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(4) seq_cst monotonic + ret void +} + +; CHECK-LABEL: {{^}}wavefront_acquire_acquire +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @wavefront_acquire_acquire(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(4) acquire acquire + ret void +} + +; CHECK-LABEL: {{^}}wavefront_release_acquire +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @wavefront_release_acquire(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(4) release acquire + ret void +} + +; CHECK-LABEL: {{^}}wavefront_acq_rel_acquire +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @wavefront_acq_rel_acquire(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(4) acq_rel acquire + ret void +} + +; CHECK-LABEL: {{^}}wavefront_seq_cst_acquire +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @wavefront_seq_cst_acquire(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(4) seq_cst acquire + ret void +} + +; CHECK-LABEL: {{^}}wavefront_seq_cst_seq_cst +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @wavefront_seq_cst_seq_cst(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(4) seq_cst seq_cst + ret void +} + +; CHECK-LABEL: {{^}}image_monotonic_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @image_monotonic_monotonic(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(5) monotonic monotonic + ret void +} + +; CHECK-LABEL: {{^}}image_acquire_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @image_acquire_monotonic(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(5) acquire monotonic + ret void +} + +; CHECK-LABEL: {{^}}image_release_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @image_release_monotonic(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(5) release monotonic + ret void +} + +; CHECK-LABEL: {{^}}image_acq_rel_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @image_acq_rel_monotonic(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(5) acq_rel monotonic + ret void +} + +; CHECK-LABEL: {{^}}image_seq_cst_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @image_seq_cst_monotonic(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(5) seq_cst monotonic + ret void +} + +; CHECK-LABEL: {{^}}image_acquire_acquire +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @image_acquire_acquire(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(5) acquire acquire + ret void +} + +; CHECK-LABEL: {{^}}image_release_acquire +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @image_release_acquire(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(5) release acquire + ret void +} + +; CHECK-LABEL: {{^}}image_acq_rel_acquire +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @image_acq_rel_acquire(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(5) acq_rel acquire + ret void +} + +; CHECK-LABEL: {{^}}image_seq_cst_acquire +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @image_seq_cst_acquire(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(5) seq_cst acquire + ret void +} + +; CHECK-LABEL: {{^}}image_seq_cst_seq_cst +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @image_seq_cst_seq_cst(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(5) seq_cst seq_cst + ret void +} + +; CHECK-LABEL: {{^}}signal_handler_monotonic_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @signal_handler_monotonic_monotonic(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in singlethread monotonic monotonic + ret void +} + +; CHECK-LABEL: {{^}}signal_handler_acquire_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @signal_handler_acquire_monotonic(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in singlethread acquire monotonic + ret void +} + +; CHECK-LABEL: {{^}}signal_handler_release_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @signal_handler_release_monotonic(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in singlethread release monotonic + ret void +} + +; CHECK-LABEL: {{^}}signal_handler_acq_rel_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @signal_handler_acq_rel_monotonic(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in singlethread acq_rel monotonic + ret void +} + +; CHECK-LABEL: {{^}}signal_handler_seq_cst_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @signal_handler_seq_cst_monotonic(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in singlethread seq_cst monotonic + ret void +} + +; CHECK-LABEL: {{^}}signal_handler_acquire_acquire +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @signal_handler_acquire_acquire(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in singlethread acquire acquire + ret void +} + +; CHECK-LABEL: {{^}}signal_handler_release_acquire +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @signal_handler_release_acquire(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in singlethread release acquire + ret void +} + +; CHECK-LABEL: {{^}}signal_handler_acq_rel_acquire +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @signal_handler_acq_rel_acquire(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in singlethread acq_rel acquire + ret void +} + +; CHECK-LABEL: {{^}}signal_handler_seq_cst_acquire +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @signal_handler_seq_cst_acquire(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in singlethread seq_cst acquire + ret void +} + +; CHECK-LABEL: {{^}}signal_handler_seq_cst_seq_cst +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @signal_handler_seq_cst_seq_cst(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in singlethread seq_cst seq_cst + ret void +} Index: test/CodeGen/AMDGPU/memory-model-atomic-fence.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/memory-model-atomic-fence.ll @@ -0,0 +1,205 @@ +; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s + +; CHECK-LABEL: {{^}}system_acquire +; CHECK: BB#0 +; CHECK-NEXT: buffer_wbinvl1_vol +; CHECK-NEXT: s_endpgm +define void @system_acquire() { + fence acquire + ret void +} + +; CHECK-LABEL: {{^}}system_release +; CHECK: BB#0 +; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: s_endpgm +define void @system_release() { + fence release + ret void +} + +; CHECK-LABEL: {{^}}system_acq_rel +; CHECK: BB#0 +; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: buffer_wbinvl1_vol +; CHECK-NEXT: s_endpgm +define void @system_acq_rel() { + fence acq_rel + ret void +} + +; CHECK-LABEL: {{^}}system_seq_cst +; CHECK: BB#0 +; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: buffer_wbinvl1_vol +; CHECK-NEXT: s_endpgm +define void @system_seq_cst() { + fence seq_cst + ret void +} + +; CHECK-LABEL: {{^}}agent_acquire +; CHECK: BB#0 +; CHECK-NEXT: buffer_wbinvl1_vol +; CHECK-NEXT: s_endpgm +define void @agent_acquire() { + fence syncscope(2) acquire + ret void +} + +; CHECK-LABEL: {{^}}agent_release +; CHECK: BB#0 +; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: s_endpgm +define void @agent_release() { + fence syncscope(2) release + ret void +} + +; CHECK-LABEL: {{^}}agent_acq_rel +; CHECK: BB#0 +; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: buffer_wbinvl1_vol +; CHECK-NEXT: s_endpgm +define void @agent_acq_rel() { + fence syncscope(2) acq_rel + ret void +} + +; CHECK-LABEL: {{^}}agent_seq_cst +; CHECK: BB#0 +; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: buffer_wbinvl1_vol +; CHECK-NEXT: s_endpgm +define void @agent_seq_cst() { + fence syncscope(2) seq_cst + ret void +} + +; CHECK-LABEL: {{^}}work_group_acquire +; CHECK: BB#0 +; CHECK-NEXT: s_endpgm +define void @work_group_acquire() { + fence syncscope(3) acquire + ret void +} + +; CHECK-LABEL: {{^}}work_group_release +; CHECK: BB#0 +; CHECK-NEXT: s_endpgm +define void @work_group_release() { + fence syncscope(3) release + ret void +} + +; CHECK-LABEL: {{^}}work_group_acq_rel +; CHECK: BB#0 +; CHECK-NEXT: s_endpgm +define void @work_group_acq_rel() { + fence syncscope(3) acq_rel + ret void +} + +; CHECK-LABEL: {{^}}work_group_seq_cst +; CHECK: BB#0 +; CHECK-NEXT: s_endpgm +define void @work_group_seq_cst() { + fence syncscope(3) seq_cst + ret void +} + +; CHECK-LABEL: {{^}}wavefront_acquire +; CHECK: BB#0 +; CHECK-NEXT: s_endpgm +define void @wavefront_acquire() { + fence syncscope(4) acquire + ret void +} + +; CHECK-LABEL: {{^}}wavefront_release +; CHECK: BB#0 +; CHECK-NEXT: s_endpgm +define void @wavefront_release() { + fence syncscope(4) release + ret void +} + +; CHECK-LABEL: {{^}}wavefront_acq_rel +; CHECK: BB#0 +; CHECK-NEXT: s_endpgm +define void @wavefront_acq_rel() { + fence syncscope(4) acq_rel + ret void +} + +; CHECK-LABEL: {{^}}wavefront_seq_cst +; CHECK: BB#0 +; CHECK-NEXT: s_endpgm +define void @wavefront_seq_cst() { + fence syncscope(4) seq_cst + ret void +} + +; CHECK-LABEL: {{^}}image_acquire +; CHECK: BB#0 +; CHECK-NEXT: s_endpgm +define void @image_acquire() { + fence syncscope(5) acquire + ret void +} + +; CHECK-LABEL: {{^}}image_release +; CHECK: BB#0 +; CHECK-NEXT: s_endpgm +define void @image_release() { + fence syncscope(5) release + ret void +} + +; CHECK-LABEL: {{^}}image_acq_rel +; CHECK: BB#0 +; CHECK-NEXT: s_endpgm +define void @image_acq_rel() { + fence syncscope(5) acq_rel + ret void +} + +; CHECK-LABEL: {{^}}image_seq_cst +; CHECK: BB#0 +; CHECK-NEXT: s_endpgm +define void @image_seq_cst() { + fence syncscope(5) seq_cst + ret void +} + +; CHECK-LABEL: {{^}}signal_handler_acquire +; CHECK: BB#0 +; CHECK-NEXT: s_endpgm +define void @signal_handler_acquire() { + fence singlethread acquire + ret void +} + +; CHECK-LABEL: {{^}}signal_handler_release +; CHECK: BB#0 +; CHECK-NEXT: s_endpgm +define void @signal_handler_release() { + fence singlethread release + ret void +} + +; CHECK-LABEL: {{^}}signal_handler_acq_rel +; CHECK: BB#0 +; CHECK-NEXT: s_endpgm +define void @signal_handler_acq_rel() { + fence singlethread acq_rel + ret void +} + +; CHECK-LABEL: {{^}}signal_handler_seq_cst +; CHECK: BB#0 +; CHECK-NEXT: s_endpgm +define void @signal_handler_seq_cst() { + fence singlethread seq_cst + ret void +} Index: test/CodeGen/AMDGPU/memory-model-atomic-load.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/memory-model-atomic-load.ll @@ -0,0 +1,289 @@ +; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s + +; CHECK-LABEL: {{^}}system_unordered +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define void @system_unordered(i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + %val = load atomic i32, i32 addrspace(4)* %in unordered, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; CHECK-LABEL: {{^}}system_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define void @system_monotonic(i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + %val = load atomic i32, i32 addrspace(4)* %in monotonic, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; CHECK-LABEL: {{^}}system_acquire +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} +; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: buffer_wbinvl1_vol +; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define void @system_acquire(i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + %val = load atomic i32, i32 addrspace(4)* %in acquire, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; CHECK-LABEL: {{^}}system_seq_cst +; CHECK: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} +; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: buffer_wbinvl1_vol +; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define void @system_seq_cst(i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + %val = load atomic i32, i32 addrspace(4)* %in seq_cst, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; CHECK-LABEL: {{^}}agent_unordered +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define void @agent_unordered(i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + %val = load atomic i32, i32 addrspace(4)* %in syncscope(2) unordered, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; CHECK-LABEL: {{^}}agent_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define void @agent_monotonic(i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + %val = load atomic i32, i32 addrspace(4)* %in syncscope(2) monotonic, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; CHECK-LABEL: {{^}}agent_acquire +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} +; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: buffer_wbinvl1_vol +; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define void @agent_acquire(i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + %val = load atomic i32, i32 addrspace(4)* %in syncscope(2) acquire, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; CHECK-LABEL: {{^}}agent_seq_cst +; CHECK: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} +; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: buffer_wbinvl1_vol +; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define void @agent_seq_cst(i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + %val = load atomic i32, i32 addrspace(4)* %in syncscope(2) seq_cst, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; CHECK-LABEL: {{^}}work_group_unordered +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define void @work_group_unordered(i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + %val = load atomic i32, i32 addrspace(4)* %in syncscope(3) unordered, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; CHECK-LABEL: {{^}}work_group_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define void @work_group_monotonic(i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + %val = load atomic i32, i32 addrspace(4)* %in syncscope(3) monotonic, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; CHECK-LABEL: {{^}}work_group_acquire +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define void @work_group_acquire(i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + %val = load atomic i32, i32 addrspace(4)* %in syncscope(3) acquire, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; CHECK-LABEL: {{^}}work_group_seq_cst +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define void @work_group_seq_cst(i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + %val = load atomic i32, i32 addrspace(4)* %in syncscope(3) seq_cst, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; CHECK-LABEL: {{^}}wavefront_unordered +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define void @wavefront_unordered(i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + %val = load atomic i32, i32 addrspace(4)* %in syncscope(4) unordered, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; CHECK-LABEL: {{^}}wavefront_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define void @wavefront_monotonic(i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + %val = load atomic i32, i32 addrspace(4)* %in syncscope(4) monotonic, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; CHECK-LABEL: {{^}}wavefront_acquire +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define void @wavefront_acquire(i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + %val = load atomic i32, i32 addrspace(4)* %in syncscope(4) acquire, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; CHECK-LABEL: {{^}}wavefront_seq_cst +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define void @wavefront_seq_cst(i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + %val = load atomic i32, i32 addrspace(4)* %in syncscope(4) seq_cst, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; CHECK-LABEL: {{^}}image_unordered +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define void @image_unordered(i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + %val = load atomic i32, i32 addrspace(4)* %in syncscope(5) unordered, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; CHECK-LABEL: {{^}}image_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define void @image_monotonic(i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + %val = load atomic i32, i32 addrspace(4)* %in syncscope(5) monotonic, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; CHECK-LABEL: {{^}}image_acquire +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define void @image_acquire(i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + %val = load atomic i32, i32 addrspace(4)* %in syncscope(5) acquire, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; CHECK-LABEL: {{^}}image_seq_cst +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define void @image_seq_cst(i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + %val = load atomic i32, i32 addrspace(4)* %in syncscope(5) seq_cst, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; CHECK-LABEL: {{^}}signal_handler_unordered +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define void @signal_handler_unordered(i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + %val = load atomic i32, i32 addrspace(4)* %in singlethread unordered, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; CHECK-LABEL: {{^}}signal_handler_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define void @signal_handler_monotonic(i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + %val = load atomic i32, i32 addrspace(4)* %in singlethread monotonic, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; CHECK-LABEL: {{^}}signal_handler_acquire +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define void @signal_handler_acquire(i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + %val = load atomic i32, i32 addrspace(4)* %in singlethread acquire, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; CHECK-LABEL: {{^}}signal_handler_seq_cst +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define void @signal_handler_seq_cst(i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + %val = load atomic i32, i32 addrspace(4)* %in singlethread seq_cst, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} Index: test/CodeGen/AMDGPU/memory-model-atomic-rmw.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/memory-model-atomic-rmw.ll @@ -0,0 +1,301 @@ +; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s + +; CHECK-LABEL: {{^}}system_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @system_monotonic(i32 addrspace(4)* %out, i32 %in) { + %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in monotonic + ret void +} + +; CHECK-LABEL: {{^}}system_acquire +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: buffer_wbinvl1_vol +define void @system_acquire(i32 addrspace(4)* %out, i32 %in) { + %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in acquire + ret void +} + +; CHECK-LABEL: {{^}}system_release +; CHECK: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @system_release(i32 addrspace(4)* %out, i32 %in) { + %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in release + ret void +} + +; CHECK-LABEL: {{^}}system_acq_rel +; CHECK: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: buffer_wbinvl1_vol +define void @system_acq_rel(i32 addrspace(4)* %out, i32 %in) { + %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in acq_rel + ret void +} + +; CHECK-LABEL: {{^}}system_seq_cst +; CHECK: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: buffer_wbinvl1_vol +define void @system_seq_cst(i32 addrspace(4)* %out, i32 %in) { + %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in seq_cst + ret void +} + +; CHECK-LABEL: {{^}}agent_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @agent_monotonic(i32 addrspace(4)* %out, i32 %in) { + %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope(2) monotonic + ret void +} + +; CHECK-LABEL: {{^}}agent_acquire +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: buffer_wbinvl1_vol +define void @agent_acquire(i32 addrspace(4)* %out, i32 %in) { + %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope(2) acquire + ret void +} + +; CHECK-LABEL: {{^}}agent_release +; CHECK: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @agent_release(i32 addrspace(4)* %out, i32 %in) { + %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope(2) release + ret void +} + +; CHECK-LABEL: {{^}}agent_acq_rel +; CHECK: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: buffer_wbinvl1_vol +define void @agent_acq_rel(i32 addrspace(4)* %out, i32 %in) { + %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope(2) acq_rel + ret void +} + +; CHECK-LABEL: {{^}}agent_seq_cst +; CHECK: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: buffer_wbinvl1_vol +define void @agent_seq_cst(i32 addrspace(4)* %out, i32 %in) { + %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope(2) seq_cst + ret void +} + +; CHECK-LABEL: {{^}}work_group_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @work_group_monotonic(i32 addrspace(4)* %out, i32 %in) { + %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope(3) monotonic + ret void +} + +; CHECK-LABEL: {{^}}work_group_acquire +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @work_group_acquire(i32 addrspace(4)* %out, i32 %in) { + %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope(3) acquire + ret void +} + +; CHECK-LABEL: {{^}}work_group_release +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @work_group_release(i32 addrspace(4)* %out, i32 %in) { + %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope(3) release + ret void +} + +; CHECK-LABEL: {{^}}work_group_acq_rel +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @work_group_acq_rel(i32 addrspace(4)* %out, i32 %in) { + %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope(3) acq_rel + ret void +} + +; CHECK-LABEL: {{^}}work_group_seq_cst +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @work_group_seq_cst(i32 addrspace(4)* %out, i32 %in) { + %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope(3) seq_cst + ret void +} + +; CHECK-LABEL: {{^}}wavefront_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @wavefront_monotonic(i32 addrspace(4)* %out, i32 %in) { + %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope(4) monotonic + ret void +} + +; CHECK-LABEL: {{^}}wavefront_acquire +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @wavefront_acquire(i32 addrspace(4)* %out, i32 %in) { + %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope(4) acquire + ret void +} + +; CHECK-LABEL: {{^}}wavefront_release +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @wavefront_release(i32 addrspace(4)* %out, i32 %in) { + %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope(4) release + ret void +} + +; CHECK-LABEL: {{^}}wavefront_acq_rel +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @wavefront_acq_rel(i32 addrspace(4)* %out, i32 %in) { + %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope(4) acq_rel + ret void +} + +; CHECK-LABEL: {{^}}wavefront_seq_cst +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @wavefront_seq_cst(i32 addrspace(4)* %out, i32 %in) { + %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope(4) seq_cst + ret void +} + +; CHECK-LABEL: {{^}}image_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @image_monotonic(i32 addrspace(4)* %out, i32 %in) { + %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope(5) monotonic + ret void +} + +; CHECK-LABEL: {{^}}image_acquire +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @image_acquire(i32 addrspace(4)* %out, i32 %in) { + %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope(5) acquire + ret void +} + +; CHECK-LABEL: {{^}}image_release +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @image_release(i32 addrspace(4)* %out, i32 %in) { + %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope(5) release + ret void +} + +; CHECK-LABEL: {{^}}image_acq_rel +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @image_acq_rel(i32 addrspace(4)* %out, i32 %in) { + %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope(5) acq_rel + ret void +} + +; CHECK-LABEL: {{^}}image_seq_cst +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @image_seq_cst(i32 addrspace(4)* %out, i32 %in) { + %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope(5) seq_cst + ret void +} + +; CHECK-LABEL: {{^}}signal_handler_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @signal_handler_monotonic(i32 addrspace(4)* %out, i32 %in) { + %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in singlethread monotonic + ret void +} + +; CHECK-LABEL: {{^}}signal_handler_acquire +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @signal_handler_acquire(i32 addrspace(4)* %out, i32 %in) { + %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in singlethread acquire + ret void +} + +; CHECK-LABEL: {{^}}signal_handler_release +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @signal_handler_release(i32 addrspace(4)* %out, i32 %in) { + %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in singlethread release + ret void +} + +; CHECK-LABEL: {{^}}signal_handler_acq_rel +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @signal_handler_acq_rel(i32 addrspace(4)* %out, i32 %in) { + %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in singlethread acq_rel + ret void +} + +; CHECK-LABEL: {{^}}signal_handler_seq_cst +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_atomic_swap v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}{{$}} +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK-NOT: buffer_wbinvl1_vol +define void @signal_handler_seq_cst(i32 addrspace(4)* %out, i32 %in) { + %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in singlethread seq_cst + ret void +} Index: test/CodeGen/AMDGPU/memory-model-atomic-store.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/memory-model-atomic-store.ll @@ -0,0 +1,193 @@ +; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s + +; CHECK-LABEL: {{^}}system_unordered +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define void @system_unordered(i32 %in, i32 addrspace(4)* %out) { + store atomic i32 %in, i32 addrspace(4)* %out unordered, align 4 + ret void +} + +; CHECK-LABEL: {{^}}system_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define void @system_monotonic(i32 %in, i32 addrspace(4)* %out) { + store atomic i32 %in, i32 addrspace(4)* %out monotonic, align 4 + ret void +} + +; CHECK-LABEL: {{^}}system_release +; CHECK: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define void @system_release(i32 %in, i32 addrspace(4)* %out) { + store atomic i32 %in, i32 addrspace(4)* %out release, align 4 + ret void +} + +; CHECK-LABEL: {{^}}system_seq_cst +; CHECK: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define void @system_seq_cst(i32 %in, i32 addrspace(4)* %out) { + store atomic i32 %in, i32 addrspace(4)* %out seq_cst, align 4 + ret void +} + +; CHECK-LABEL: {{^}}agent_unordered +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define void @agent_unordered(i32 %in, i32 addrspace(4)* %out) { + store atomic i32 %in, i32 addrspace(4)* %out syncscope(2) unordered, align 4 + ret void +} + +; CHECK-LABEL: {{^}}agent_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define void @agent_monotonic(i32 %in, i32 addrspace(4)* %out) { + store atomic i32 %in, i32 addrspace(4)* %out syncscope(2) monotonic, align 4 + ret void +} + +; CHECK-LABEL: {{^}}agent_release +; CHECK: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define void @agent_release(i32 %in, i32 addrspace(4)* %out) { + store atomic i32 %in, i32 addrspace(4)* %out syncscope(2) release, align 4 + ret void +} + +; CHECK-LABEL: {{^}}agent_seq_cst +; CHECK: s_waitcnt vmcnt(0){{$}} +; CHECK-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define void @agent_seq_cst(i32 %in, i32 addrspace(4)* %out) { + store atomic i32 %in, i32 addrspace(4)* %out syncscope(2) seq_cst, align 4 + ret void +} + +; CHECK-LABEL: {{^}}work_group_unordered +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define void @work_group_unordered(i32 %in, i32 addrspace(4)* %out) { + store atomic i32 %in, i32 addrspace(4)* %out syncscope(3) unordered, align 4 + ret void +} + +; CHECK-LABEL: {{^}}work_group_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define void @work_group_monotonic(i32 %in, i32 addrspace(4)* %out) { + store atomic i32 %in, i32 addrspace(4)* %out syncscope(3) monotonic, align 4 + ret void +} + +; CHECK-LABEL: {{^}}work_group_release +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define void @work_group_release(i32 %in, i32 addrspace(4)* %out) { + store atomic i32 %in, i32 addrspace(4)* %out syncscope(3) release, align 4 + ret void +} + +; CHECK-LABEL: {{^}}work_group_seq_cst +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define void @work_group_seq_cst(i32 %in, i32 addrspace(4)* %out) { + store atomic i32 %in, i32 addrspace(4)* %out syncscope(3) seq_cst, align 4 + ret void +} + +; CHECK-LABEL: {{^}}wavefront_unordered +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define void @wavefront_unordered(i32 %in, i32 addrspace(4)* %out) { + store atomic i32 %in, i32 addrspace(4)* %out syncscope(4) unordered, align 4 + ret void +} + +; CHECK-LABEL: {{^}}wavefront_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define void @wavefront_monotonic(i32 %in, i32 addrspace(4)* %out) { + store atomic i32 %in, i32 addrspace(4)* %out syncscope(4) monotonic, align 4 + ret void +} + +; CHECK-LABEL: {{^}}wavefront_release +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define void @wavefront_release(i32 %in, i32 addrspace(4)* %out) { + store atomic i32 %in, i32 addrspace(4)* %out syncscope(4) release, align 4 + ret void +} + +; CHECK-LABEL: {{^}}wavefront_seq_cst +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define void @wavefront_seq_cst(i32 %in, i32 addrspace(4)* %out) { + store atomic i32 %in, i32 addrspace(4)* %out syncscope(4) seq_cst, align 4 + ret void +} + +; CHECK-LABEL: {{^}}image_unordered +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define void @image_unordered(i32 %in, i32 addrspace(4)* %out) { + store atomic i32 %in, i32 addrspace(4)* %out syncscope(5) unordered, align 4 + ret void +} + +; CHECK-LABEL: {{^}}image_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define void @image_monotonic(i32 %in, i32 addrspace(4)* %out) { + store atomic i32 %in, i32 addrspace(4)* %out syncscope(5) monotonic, align 4 + ret void +} + +; CHECK-LABEL: {{^}}image_release +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define void @image_release(i32 %in, i32 addrspace(4)* %out) { + store atomic i32 %in, i32 addrspace(4)* %out syncscope(5) release, align 4 + ret void +} + +; CHECK-LABEL: {{^}}image_seq_cst +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define void @image_seq_cst(i32 %in, i32 addrspace(4)* %out) { + store atomic i32 %in, i32 addrspace(4)* %out syncscope(5) seq_cst, align 4 + ret void +} + +; CHECK-LABEL: {{^}}signal_handler_unordered +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define void @signal_handler_unordered(i32 %in, i32 addrspace(4)* %out) { + store atomic i32 %in, i32 addrspace(4)* %out singlethread unordered, align 4 + ret void +} + +; CHECK-LABEL: {{^}}signal_handler_monotonic +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define void @signal_handler_monotonic(i32 %in, i32 addrspace(4)* %out) { + store atomic i32 %in, i32 addrspace(4)* %out singlethread monotonic, align 4 + ret void +} + +; CHECK-LABEL: {{^}}signal_handler_release +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define void @signal_handler_release(i32 %in, i32 addrspace(4)* %out) { + store atomic i32 %in, i32 addrspace(4)* %out singlethread release, align 4 + ret void +} + +; CHECK-LABEL: {{^}}signal_handler_seq_cst +; CHECK-NOT: s_waitcnt vmcnt(0){{$}} +; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define void @signal_handler_seq_cst(i32 %in, i32 addrspace(4)* %out) { + store atomic i32 %in, i32 addrspace(4)* %out singlethread seq_cst, align 4 + ret void +} Index: test/CodeGen/AMDGPU/memory-model-invalid-synch-scope.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/memory-model-invalid-synch-scope.ll @@ -0,0 +1,33 @@ +; RUN: not llc -mtriple=amdgcn-- -mcpu=fiji -verify-machineinstrs < %s 2>&1 | FileCheck %s + +; CHECK: error: :0:0: in function invalid_fence void (): Unknown synchronization scope +define void @invalid_fence() { + fence syncscope(6) seq_cst + ret void +} + +; CHECK: error: :0:0: in function invalid_load void (i32 addrspace(4)*, i32 addrspace(4)*): Unknown synchronization scope +define void @invalid_load(i32 addrspace(4)* %in, i32 addrspace(4)* %out) { + %val = load atomic i32, i32 addrspace(4)* %in syncscope(6) seq_cst, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; CHECK: error: :0:0: in function invalid_store void (i32, i32 addrspace(4)*): Unknown synchronization scope +define void @invalid_store(i32 %in, i32 addrspace(4)* %out) { + store atomic i32 %in, i32 addrspace(4)* %out syncscope(6) seq_cst, align 4 + ret void +} + +; CHECK: error: :0:0: in function invalid_cmpxchg void (i32 addrspace(4)*, i32, i32): Unknown synchronization scope +define void @invalid_cmpxchg(i32 addrspace(4)* %out, i32 %in, i32 %old) { + %gep = getelementptr i32, i32 addrspace(4)* %out, i32 4 + %val = cmpxchg volatile i32 addrspace(4)* %gep, i32 %old, i32 %in syncscope(6) seq_cst seq_cst + ret void +} + +; CHECK: error: :0:0: in function invalid_rmw void (i32 addrspace(4)*, i32): Unknown synchronization scope +define void @invalid_rmw(i32 addrspace(4)* %out, i32 %in) { + %val = atomicrmw volatile xchg i32 addrspace(4)* %out, i32 %in syncscope(6) seq_cst + ret void +}