Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -81,16 +81,16 @@ static bool checkType(const Value *ptr, unsigned int addrspace); static bool checkPrivateAddress(const MachineMemOperand *Op); - static bool isGlobalStore(const StoreSDNode *N); - static bool isFlatStore(const StoreSDNode *N); + static bool isGlobalStore(const MemSDNode *N); + static bool isFlatStore(const MemSDNode *N); static bool isPrivateStore(const StoreSDNode *N); static bool isLocalStore(const StoreSDNode *N); static bool isRegionStore(const StoreSDNode *N); bool isCPLoad(const LoadSDNode *N) const; - bool isConstantLoad(const LoadSDNode *N, int cbID) const; - bool isGlobalLoad(const LoadSDNode *N) const; - bool isFlatLoad(const LoadSDNode *N) const; + bool isConstantLoad(const MemSDNode *N, int cbID) const; + bool isGlobalLoad(const MemSDNode *N) const; + bool isFlatLoad(const MemSDNode *N) const; bool isParamLoad(const LoadSDNode *N) const; bool isPrivateLoad(const LoadSDNode *N) const; bool isLocalLoad(const LoadSDNode *N) const; @@ -128,6 +128,8 @@ SDValue &TFE) const; bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset, SDValue &GLC) const; + bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, + SDValue &Offset) const; void SelectMUBUFConstant(SDValue Constant, SDValue &SOffset, SDValue &ImmOffset) const; @@ -558,7 +560,9 @@ return false; } -bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) { +bool AMDGPUDAGToDAGISel::isGlobalStore(const MemSDNode *N) { + if (!N->writeMem()) + return false; return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); } @@ -573,7 +577,9 @@ return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); } -bool AMDGPUDAGToDAGISel::isFlatStore(const StoreSDNode *N) { +bool AMDGPUDAGToDAGISel::isFlatStore(const MemSDNode *N) { + if (!N->writeMem()) + return false; return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS); } @@ -581,7 +587,9 @@ return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); } -bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const { +bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { + if (!N->readMem()) + return false; const Value *MemVal = N->getMemOperand()->getValue(); if (CbId == -1) return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS); @@ -589,7 +597,9 @@ return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId); } -bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const { +bool AMDGPUDAGToDAGISel::isGlobalLoad(const MemSDNode *N) const { + if (!N->readMem()) + return false; if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || N->getMemoryVT().bitsLT(MVT::i32)) @@ -606,7 +616,9 @@ return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); } -bool AMDGPUDAGToDAGISel::isFlatLoad(const LoadSDNode *N) const { +bool AMDGPUDAGToDAGISel::isFlatLoad(const MemSDNode *N) const { + if (!N->readMem()) + return false; return checkType(N->getMemOperand()->getValue(), AMDGPUAS::FLAT_ADDRESS); } @@ -955,8 +967,10 @@ SDLoc DL(Addr); - GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); - SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); + if (!GLC.getNode()) + GLC = CurDAG->getTargetConstant(0, DL, MVT::i1); + if (!SLC.getNode()) + SLC = CurDAG->getTargetConstant(0, DL, MVT::i1); TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); @@ -1113,6 +1127,13 @@ } bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, + SDValue &Soffset, SDValue &Offset + ) const { + SDValue GLC, SLC, TFE; + + return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); +} +bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset, SDValue &GLC) const { SDValue SLC, TFE; Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -192,6 +192,11 @@ return isGlobalStore(dyn_cast(N)); }]>; +def global_store_atomic : PatFrag<(ops node:$val, node:$ptr), + (atomic_store node:$val, node:$ptr), [{ + return isGlobalStore(dyn_cast(N)); +}]>; + // Global address space loads def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return isGlobalLoad(dyn_cast(N)); Index: llvm/trunk/lib/Target/AMDGPU/CIInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/CIInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/CIInstructions.td @@ -289,6 +289,11 @@ (inst $addr, 0, 0, 0) >; +class FlatLoadAtomicPat : Pat < + (vt (node i64:$addr)), + (inst $addr, 1, 0, 0) +>; + def : FlatLoadPat ; def : FlatLoadPat ; def : FlatLoadPat ; @@ -297,17 +302,30 @@ def : FlatLoadPat ; def : FlatLoadPat ; +def : FlatLoadAtomicPat ; +def : FlatLoadAtomicPat ; + + class FlatStorePat : Pat < (node vt:$data, i64:$addr), (inst $addr, $data, 0, 0, 0) >; +class FlatStoreAtomicPat : Pat < + // atomic store follows aotmic binop convenction so the address comes first + (node i64:$addr, vt:$data), + (inst $addr, $data, 1, 0, 0) +>; + def : FlatStorePat ; def : FlatStorePat ; def : FlatStorePat ; def : FlatStorePat ; def : FlatStorePat ; +def : FlatStoreAtomicPat ; +def : FlatStoreAtomicPat ; + class FlatAtomicPat : Pat < (vt (node i64:$addr, data_vt:$data)), Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td @@ -140,12 +140,13 @@ class flat_ld : PatFrag<(ops node:$ptr), (ld node:$ptr), [{ - return isFlatLoad(dyn_cast(N)) || - isGlobalLoad(dyn_cast(N)) || - isConstantLoad(cast(N), -1); + return isFlatLoad(dyn_cast(N)) || + isGlobalLoad(dyn_cast(N)) || + isConstantLoad(cast(N), -1); }]>; def flat_load : flat_ld ; +def atomic_flat_load : flat_ld; def flat_az_extloadi8 : flat_ld ; def flat_sextloadi8 : flat_ld ; def flat_az_extloadi16 : flat_ld ; @@ -153,11 +154,12 @@ class flat_st : PatFrag<(ops node:$val, node:$ptr), (st node:$val, node:$ptr), [{ - return isFlatStore(dyn_cast(N)) || - isGlobalStore(dyn_cast(N)); + return isFlatStore(dyn_cast(N)) || + isGlobalStore(dyn_cast(N)); }]>; def flat_store: flat_st ; +def atomic_flat_store: flat_st ; def flat_truncstorei8 : flat_st ; def flat_truncstorei16 : flat_st ; @@ -167,6 +169,12 @@ isConstantLoad(cast(N), -1); }]>; +def mubuf_load_atomic : PatFrag <(ops node:$ptr), (atomic_load node:$ptr), [{ + return isGlobalLoad(cast(N)) || + isConstantLoad(cast(N), -1); +}]>; + + def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ return isConstantLoad(cast(N), -1) && static_cast(getTargetLowering())->isMemOpUniform(N); @@ -721,6 +729,7 @@ def MUBUFAddr64Atomic : ComplexPattern; def MUBUFScratch : ComplexPattern; def MUBUFOffset : ComplexPattern; +def MUBUFOffsetNoGLC : ComplexPattern; def MUBUFOffsetAtomic : ComplexPattern; def MUBUFIntrinsicOffset : ComplexPattern; def MUBUFIntrinsicVOffset : ComplexPattern; Index: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td @@ -3102,20 +3102,35 @@ // MUBUF Patterns //===----------------------------------------------------------------------===// -multiclass MUBUFLoad_Pattern { - def : Pat < +class MUBUFLoad_Pattern : Pat < (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe))), (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe) >; + +multiclass MUBUFLoad_Atomic_Pattern { + def : Pat < + (vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, + i16:$offset, i1:$slc))), + (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 1, $slc, 0) + >; + + def : Pat < + (vt (atomic_ld (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset))), + (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 1, 0, 0) + >; } let Predicates = [isSICI] in { -defm : MUBUFLoad_Pattern ; -defm : MUBUFLoad_Pattern ; -defm : MUBUFLoad_Pattern ; -defm : MUBUFLoad_Pattern ; +def : MUBUFLoad_Pattern ; +def : MUBUFLoad_Pattern ; +def : MUBUFLoad_Pattern ; +def : MUBUFLoad_Pattern ; + +defm : MUBUFLoad_Atomic_Pattern ; +defm : MUBUFLoad_Atomic_Pattern ; } // End Predicates = [isSICI] class MUBUFScratchLoadPat : Pat < @@ -3176,6 +3191,25 @@ defm : MUBUF_Load_Dword ; +multiclass MUBUFStore_Atomic_Pattern { + // Store follows atomic op convention so address is forst + def : Pat < + (atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, + i16:$offset, i1:$slc), vt:$val), + (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 1, $slc, 0) + >; + + def : Pat < + (atomic_st (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val), + (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 1, 0, 0) + >; +} +let Predicates = [isSICI] in { +defm : MUBUFStore_Atomic_Pattern ; +defm : MUBUFStore_Atomic_Pattern ; +} // End Predicates = [isSICI] + class MUBUFScratchStorePat : Pat < (st vt:$value, (MUBUFScratch v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset)), Index: llvm/trunk/test/CodeGen/AMDGPU/global_atomics.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/global_atomics.ll +++ llvm/trunk/test/CodeGen/AMDGPU/global_atomics.ll @@ -930,3 +930,181 @@ store i32 %0, i32 addrspace(1)* %out2 ret void } + +; ATOMIC_LOAD +; FUNC-LABEL: {{^}}atomic_load_i32_offset: +; SI: buffer_load_dword [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} +; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} +; GCN: buffer_store_dword [[RET]] +define void @atomic_load_i32_offset(i32 addrspace(1)* %in, i32 addrspace(1)* %out) { +entry: + %gep = getelementptr i32, i32 addrspace(1)* %in, i32 4 + %0 = load atomic i32, i32 addrspace(1)* %gep seq_cst, align 4 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}atomic_load_i32: +; SI: buffer_load_dword [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc +; GCN: buffer_store_dword [[RET]] +define void @atomic_load_i32(i32 addrspace(1)* %in, i32 addrspace(1)* %out) { +entry: + %0 = load atomic i32, i32 addrspace(1)* %in seq_cst, align 4 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}atomic_load_i32_addr64_offset: +; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} +; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}} +; GCN: buffer_store_dword [[RET]] +define void @atomic_load_i32_addr64_offset(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i64 %index) { +entry: + %ptr = getelementptr i32, i32 addrspace(1)* %in, i64 %index + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 + %0 = load atomic i32, i32 addrspace(1)* %gep seq_cst, align 4 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}atomic_load_i32_addr64: +; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} +; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}} +; GCN: buffer_store_dword [[RET]] +define void @atomic_load_i32_addr64(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i64 %index) { +entry: + %ptr = getelementptr i32, i32 addrspace(1)* %in, i64 %index + %0 = load atomic i32, i32 addrspace(1)* %ptr seq_cst, align 4 + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}atomic_load_i64_offset: +; SI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} +; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} +; GCN: buffer_store_dwordx2 [[RET]] +define void @atomic_load_i64_offset(i64 addrspace(1)* %in, i64 addrspace(1)* %out) { +entry: + %gep = getelementptr i64, i64 addrspace(1)* %in, i64 4 + %0 = load atomic i64, i64 addrspace(1)* %gep seq_cst, align 8 + store i64 %0, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}atomic_load_i64: +; SI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc +; GCN: buffer_store_dwordx2 [[RET]] +define void @atomic_load_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %out) { +entry: + %0 = load atomic i64, i64 addrspace(1)* %in seq_cst, align 8 + store i64 %0, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}atomic_load_i64_addr64_offset: +; SI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}} +; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}} +; GCN: buffer_store_dwordx2 [[RET]] +define void @atomic_load_i64_addr64_offset(i64 addrspace(1)* %in, i64 addrspace(1)* %out, i64 %index) { +entry: + %ptr = getelementptr i64, i64 addrspace(1)* %in, i64 %index + %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4 + %0 = load atomic i64, i64 addrspace(1)* %gep seq_cst, align 8 + store i64 %0, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}atomic_load_i64_addr64: +; SI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} +; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}} +; GCN: buffer_store_dwordx2 [[RET]] +define void @atomic_load_i64_addr64(i64 addrspace(1)* %in, i64 addrspace(1)* %out, i64 %index) { +entry: + %ptr = getelementptr i64, i64 addrspace(1)* %in, i64 %index + %0 = load atomic i64, i64 addrspace(1)* %ptr seq_cst, align 8 + store i64 %0, i64 addrspace(1)* %out + ret void +} + +; ATOMIC_STORE +; FUNC-LABEL: {{^}}atomic_store_i32_offset: +; SI: buffer_store_dword {{v[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} +; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}} +define void @atomic_store_i32_offset(i32 %in, i32 addrspace(1)* %out) { +entry: + %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 + store atomic i32 %in, i32 addrspace(1)* %gep seq_cst, align 4 + ret void +} + +; FUNC-LABEL: {{^}}atomic_store_i32: +; SI: buffer_store_dword {{v[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc{{$}} +; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}} +define void @atomic_store_i32(i32 %in, i32 addrspace(1)* %out) { +entry: + store atomic i32 %in, i32 addrspace(1)* %out seq_cst, align 4 + ret void +} + +; FUNC-LABEL: {{^}}atomic_store_i32_addr64_offset: +; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} +; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}} +define void @atomic_store_i32_addr64_offset(i32 %in, i32 addrspace(1)* %out, i64 %index) { +entry: + %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 + store atomic i32 %in, i32 addrspace(1)* %gep seq_cst, align 4 + ret void +} + +; FUNC-LABEL: {{^}}atomic_store_i32_addr64: +; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} +; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}} +define void @atomic_store_i32_addr64(i32 %in, i32 addrspace(1)* %out, i64 %index) { +entry: + %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index + store atomic i32 %in, i32 addrspace(1)* %ptr seq_cst, align 4 + ret void +} + +; FUNC-LABEL: {{^}}atomic_store_i64_offset: +; SI: buffer_store_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} +; VI: flat_store_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} +define void @atomic_store_i64_offset(i64 %in, i64 addrspace(1)* %out) { +entry: + %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 + store atomic i64 %in, i64 addrspace(1)* %gep seq_cst, align 8 + ret void +} + +; FUNC-LABEL: {{^}}atomic_store_i64: +; SI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}] glc +define void @atomic_store_i64(i64 %in, i64 addrspace(1)* %out) { +entry: + store atomic i64 %in, i64 addrspace(1)* %out seq_cst, align 8 + ret void +} + +; FUNC-LABEL: {{^}}atomic_store_i64_addr64_offset: +; SI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}} +; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}] glc{{$}} +define void @atomic_store_i64_addr64_offset(i64 %in, i64 addrspace(1)* %out, i64 %index) { +entry: + %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index + %gep = getelementptr i64, i64 addrspace(1)* %ptr, i64 4 + store atomic i64 %in, i64 addrspace(1)* %gep seq_cst, align 8 + ret void +} + +; FUNC-LABEL: {{^}}atomic_store_i64_addr64: +; SI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} +; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}] glc{{$}} +define void @atomic_store_i64_addr64(i64 %in, i64 addrspace(1)* %out, i64 %index) { +entry: + %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index + store atomic i64 %in, i64 addrspace(1)* %ptr seq_cst, align 8 + ret void +}