Index: lib/Target/AMDGPU/EvergreenInstructions.td =================================================================== --- lib/Target/AMDGPU/EvergreenInstructions.td +++ lib/Target/AMDGPU/EvergreenInstructions.td @@ -35,28 +35,59 @@ : EG_CF_RAT <0x57, rat_inst, rat_id, mask, (outs), ins, "MEM_RAT_CACHELESS "#name, pattern>; -class CF_MEM_RAT rat_inst, bits<4> rat_id, dag ins, string name, - list pattern> - : EG_CF_RAT <0x56, rat_inst, rat_id, 0xf /* mask */, (outs), ins, +class CF_MEM_RAT rat_inst, bits<4> rat_id, bits<4> mask, dag ins, + dag outs, string name, list pattern> + : EG_CF_RAT <0x56, rat_inst, rat_id, mask, outs, ins, "MEM_RAT "#name, pattern>; class CF_MEM_RAT_STORE_TYPED has_eop> - : CF_MEM_RAT <0x1, ?, (ins R600_Reg128:$rw_gpr, R600_Reg128:$index_gpr, - i32imm:$rat_id, InstFlag:$eop), + : CF_MEM_RAT <0x1, ?, 0xf, (ins R600_Reg128:$rw_gpr, R600_Reg128:$index_gpr, + i32imm:$rat_id, InstFlag:$eop), (outs), "STORE_TYPED RAT($rat_id) $rw_gpr, $index_gpr" #!if(has_eop, ", $eop", ""), [(int_r600_rat_store_typed R600_Reg128:$rw_gpr, R600_Reg128:$index_gpr, (i32 imm:$rat_id))]>; -def RAT_MSKOR : CF_MEM_RAT <0x11, 0, - (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr), +def RAT_MSKOR : CF_MEM_RAT <0x11, 0, 0xf, + (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr), (outs), "MSKOR $rw_gpr.XW, $index_gpr", [(mskor_global v4i32:$rw_gpr, i32:$index_gpr)] > { let eop = 0; } + +multiclass RAT_ATOMIC op_ret, bits<6> op_noret, string name> { + let Constraints = "$rw_gpr = $out_gpr", eop = 0, mayStore = 1 in { + def _RTN: CF_MEM_RAT ; + def _NORET: CF_MEM_RAT ; + } +} + +// Swap no-ret is just store. Raw store to cached target +// can only store on dword, which exactly matches swap_no_ret. +defm RAT_ATOMIC_XCHG_INT : RAT_ATOMIC<1, 34, "ATOMIC_XCHG_INT">; +defm RAT_ATOMIC_CMPXCHG_INT : RAT_ATOMIC<4, 36, "ATOMIC_CMPXCHG_INT">; +defm RAT_ATOMIC_ADD : RAT_ATOMIC<7, 39, "ATOMIC_ADD">; +defm RAT_ATOMIC_SUB : RAT_ATOMIC<8, 40, "ATOMIC_SUB">; +defm RAT_ATOMIC_RSUB : RAT_ATOMIC<9, 41, "ATOMIC_RSUB">; +defm RAT_ATOMIC_MIN_INT : RAT_ATOMIC<10, 42, "ATOMIC_MIN_INT">; +defm RAT_ATOMIC_MIN_UINT : RAT_ATOMIC<11, 43, "ATOMIC_MIN_UINT">; +defm RAT_ATOMIC_MAX_INT : RAT_ATOMIC<12, 44, "ATOMIC_MAX_INT">; +defm RAT_ATOMIC_MAX_UINT : RAT_ATOMIC<13, 45, "ATOMIC_MAX_UINT">; +defm RAT_ATOMIC_AND : RAT_ATOMIC<14, 46, "ATOMIC_AND">; +defm RAT_ATOMIC_OR : RAT_ATOMIC<15, 47, "ATOMIC_OR">; +defm RAT_ATOMIC_XOR : RAT_ATOMIC<16, 48, "ATOMIC_XOR">; +defm RAT_ATOMIC_INC_UINT : RAT_ATOMIC<18, 50, "ATOMIC_INC_UINT">; +defm RAT_ATOMIC_DEC_UINT : RAT_ATOMIC<19, 51, "ATOMIC_DEC_UINT">; + } // End let Predicates = [isEGorCayman] //===----------------------------------------------------------------------===// @@ -257,6 +288,76 @@ let Predicates = [isEGorCayman] in { +multiclass AtomicPat { + // FIXME: Add _RTN version. We need per WI scratch location to store the old value + // EXTRACT_SUBREG here is dummy, we know the node has no uses + def : Pat<(i32 (node_noret i32:$ptr, i32:$data)), + (EXTRACT_SUBREG (inst_noret + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), $data, sub0), $ptr), sub1)>; +} +multiclass AtomicIncDecPat { + // FIXME: Add _RTN version. We need per WI scratch location to store the old value + // EXTRACT_SUBREG here is dummy, we know the node has no uses + def : Pat<(i32 (node_noret i32:$ptr, C)), + (EXTRACT_SUBREG (inst_noret + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (MOV_IMM_I32 -1), sub0), $ptr), sub1)>; +} + +// CMPSWAP is pattern is special +// EXTRACT_SUBREG here is dummy, we know the node has no uses +// FIXME: Add _RTN version. We need per WI scratch location to store the old value +def : Pat<(i32 (atomic_cmp_swap_global_noret i32:$ptr, i32:$cmp, i32:$data)), + (EXTRACT_SUBREG (RAT_ATOMIC_CMPXCHG_INT_NORET + (INSERT_SUBREG + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), $cmp, sub3), + $data, sub0), + $ptr), sub1)>; + +defm AtomicSwapPat : AtomicPat ; +defm AtomicAddPat : AtomicPat ; +defm AtomicSubPat : AtomicPat ; +defm AtomicMinPat : AtomicPat ; +defm AtomicUMinPat : AtomicPat ; +defm AtomicMaxPat : AtomicPat ; +defm AtomicUMaxPat : AtomicPat ; +defm AtomicAndPat : AtomicPat ; +defm AtomicOrPat : AtomicPat ; +defm AtomicXorPat : AtomicPat ; +defm AtomicIncAddPat : AtomicIncDecPat ; +defm AtomicIncSubPat : AtomicIncDecPat ; +defm AtomicDecAddPat : AtomicIncDecPat ; +defm AtomicDecSubPat : AtomicIncDecPat ; + // Should be predicated on FeatureFP64 // def FMA_64 : R600_3OP < // 0xA, "FMA_64", Index: lib/Target/AMDGPU/R600ISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/R600ISelLowering.cpp +++ lib/Target/AMDGPU/R600ISelLowering.cpp @@ -221,6 +221,11 @@ setOperationAction(ISD::SUBE, VT, Expand); } + // LLVM will expand these to atomic_cmp_swap(0) + // and atomic_swap, respectively. + setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); + setSchedulingPreference(Sched::Source); setTargetDAGCombine(ISD::FP_ROUND); Index: test/CodeGen/AMDGPU/r600.global_atomics.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/r600.global_atomics.ll @@ -0,0 +1,542 @@ +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s + +; TODO: Add _RTN versions and merge with the GCN test + +; FUNC-LABEL: {{^}}atomic_add_i32_offset: +; EG: MEM_RAT ATOMIC_ADD [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_add_i32_offset(i32 addrspace(1)* %out, i32 %in) { +entry: + %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 + %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_add_i32_soffset: +; EG: MEM_RAT ATOMIC_ADD [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_add_i32_soffset(i32 addrspace(1)* %out, i32 %in) { +entry: + %gep = getelementptr i32, i32 addrspace(1)* %out, i64 9000 + %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_add_i32_huge_offset: +; FIXME: looks like the offset is wrong +; EG: MEM_RAT ATOMIC_ADD [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_add_i32_huge_offset(i32 addrspace(1)* %out, i32 %in) { +entry: + %gep = getelementptr i32, i32 addrspace(1)* %out, i64 47224239175595 + + %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_add_i32_addr64_offset: +; EG: MEM_RAT ATOMIC_ADD [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_add_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { +entry: + %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 + %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_add_i32: +; EG: MEM_RAT ATOMIC_ADD [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_add_i32(i32 addrspace(1)* %out, i32 %in) { +entry: + %val = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_add_i32_addr64: +; EG: MEM_RAT ATOMIC_ADD [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_add_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { +entry: + %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index + %val = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_and_i32_offset: +; EG: MEM_RAT ATOMIC_AND [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_and_i32_offset(i32 addrspace(1)* %out, i32 %in) { +entry: + %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 + %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_and_i32_addr64_offset: +; EG: MEM_RAT ATOMIC_AND [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_and_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { +entry: + %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 + %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_and_i32: +; EG: MEM_RAT ATOMIC_AND [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_and_i32(i32 addrspace(1)* %out, i32 %in) { +entry: + %val = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_and_i32_addr64: +; EG: MEM_RAT ATOMIC_AND [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_and_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { +entry: + %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index + %val = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_sub_i32_offset: +; EG: MEM_RAT ATOMIC_SUB [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_sub_i32_offset(i32 addrspace(1)* %out, i32 %in) { +entry: + %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 + %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_sub_i32_addr64_offset: +; EG: MEM_RAT ATOMIC_SUB [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_sub_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { +entry: + %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 + %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_sub_i32: +; EG: MEM_RAT ATOMIC_SUB [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_sub_i32(i32 addrspace(1)* %out, i32 %in) { +entry: + %val = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_sub_i32_addr64: +; EG: MEM_RAT ATOMIC_SUB [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_sub_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { +entry: + %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index + %val = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_max_i32_offset: +; EG: MEM_RAT ATOMIC_MAX_INT [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_max_i32_offset(i32 addrspace(1)* %out, i32 %in) { +entry: + %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 + %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_max_i32_addr64_offset: +; EG: MEM_RAT ATOMIC_MAX_INT [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_max_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { +entry: + %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 + %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_max_i32: +; EG: MEM_RAT ATOMIC_MAX_INT [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_max_i32(i32 addrspace(1)* %out, i32 %in) { +entry: + %val = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_max_i32_addr64: +; EG: MEM_RAT ATOMIC_MAX_INT [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_max_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { +entry: + %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index + %val = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_umax_i32_offset: +; EG: MEM_RAT ATOMIC_MAX_UINT [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_umax_i32_offset(i32 addrspace(1)* %out, i32 %in) { +entry: + %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 + %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_umax_i32_addr64_offset: +; EG: MEM_RAT ATOMIC_MAX_UINT [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_umax_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { +entry: + %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 + %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_umax_i32: +; EG: MEM_RAT ATOMIC_MAX_UINT [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_umax_i32(i32 addrspace(1)* %out, i32 %in) { +entry: + %val = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_umax_i32_addr64: +; EG: MEM_RAT ATOMIC_MAX_UINT [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_umax_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { +entry: + %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index + %val = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_min_i32_offset: +; EG: MEM_RAT ATOMIC_MIN_INT [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_min_i32_offset(i32 addrspace(1)* %out, i32 %in) { +entry: + %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 + %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_min_i32_addr64_offset: +; EG: MEM_RAT ATOMIC_MIN_INT [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_min_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { +entry: + %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 + %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_min_i32: +; EG: MEM_RAT ATOMIC_MIN_INT [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_min_i32(i32 addrspace(1)* %out, i32 %in) { +entry: + %val = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_min_i32_addr64: +; EG: MEM_RAT ATOMIC_MIN_INT [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_min_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { +entry: + %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index + %val = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_umin_i32_offset: +; EG: MEM_RAT ATOMIC_MIN_UINT [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_umin_i32_offset(i32 addrspace(1)* %out, i32 %in) { +entry: + %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 + %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_umin_i32_addr64_offset: +; EG: MEM_RAT ATOMIC_MIN_UINT [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_umin_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { +entry: + %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 + %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_umin_i32: +; EG: MEM_RAT ATOMIC_MIN_UINT [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_umin_i32(i32 addrspace(1)* %out, i32 %in) { +entry: + %val = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_umin_i32_addr64: +; EG: MEM_RAT ATOMIC_MIN_UINT [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_umin_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { +entry: + %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index + %val = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_or_i32_offset: +; EG: MEM_RAT ATOMIC_OR [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_or_i32_offset(i32 addrspace(1)* %out, i32 %in) { +entry: + %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 + %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_or_i32_addr64_offset: +; EG: MEM_RAT ATOMIC_OR [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_or_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { +entry: + %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 + %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_or_i32: +; EG: MEM_RAT ATOMIC_OR [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_or_i32(i32 addrspace(1)* %out, i32 %in) { +entry: + %val = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_or_i32_addr64: +; EG: MEM_RAT ATOMIC_OR [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_or_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { +entry: + %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index + %val = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_xchg_i32_offset: +; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) { +entry: + %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 + %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_xchg_i32_addr64_offset: +; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_xchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { +entry: + %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 + %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_xchg_i32: +; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_xchg_i32(i32 addrspace(1)* %out, i32 %in) { +entry: + %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_xchg_i32_addr64: +; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_xchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { +entry: + %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index + %val = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_offset: +; EG: MEM_RAT ATOMIC_CMPXCHG_INT [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_cmpxchg_i32_offset(i32 addrspace(1)* %out, i32 %in, i32 %old) { +entry: + %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 + %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset: +; EG: MEM_RAT ATOMIC_CMPXCHG_INT [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_cmpxchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) { +entry: + %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 + %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_cmpxchg_i32: +; EG: MEM_RAT ATOMIC_CMPXCHG_INT [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_cmpxchg_i32(i32 addrspace(1)* %out, i32 %in, i32 %old) { +entry: + %val = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_addr64: +; EG: MEM_RAT ATOMIC_CMPXCHG_INT [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_cmpxchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) { +entry: + %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index + %val = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %old, i32 %in seq_cst seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_xor_i32_offset: +; EG: MEM_RAT ATOMIC_XOR [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) { +entry: + %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 + %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_xor_i32_addr64_offset: +; EG: MEM_RAT ATOMIC_XOR [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_xor_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { +entry: + %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 + %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_xor_i32: +; EG: MEM_RAT ATOMIC_XOR [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_xor_i32(i32 addrspace(1)* %out, i32 %in) { +entry: + %val = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_xor_i32_addr64: +; EG: MEM_RAT ATOMIC_XOR [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z +define void @atomic_xor_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { +entry: + %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index + %val = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_store_i32_offset: +; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Y +define void @atomic_store_i32_offset(i32 %in, i32 addrspace(1)* %out) { +entry: + %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 + store atomic i32 %in, i32 addrspace(1)* %gep seq_cst, align 4 + ret void +} + +; FUNC-LABEL: {{^}}atomic_store_i32: +; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Y +define void @atomic_store_i32(i32 %in, i32 addrspace(1)* %out) { +entry: + store atomic i32 %in, i32 addrspace(1)* %out seq_cst, align 4 + ret void +} + +; FUNC-LABEL: {{^}}atomic_store_i32_addr64_offset: +; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Y +define void @atomic_store_i32_addr64_offset(i32 %in, i32 addrspace(1)* %out, i64 %index) { +entry: + %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 + store atomic i32 %in, i32 addrspace(1)* %gep seq_cst, align 4 + ret void +} + +; FUNC-LABEL: {{^}}atomic_store_i32_addr64: +; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]] +; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Y +define void @atomic_store_i32_addr64(i32 %in, i32 addrspace(1)* %out, i64 %index) { +entry: + %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index + store atomic i32 %in, i32 addrspace(1)* %ptr seq_cst, align 4 + ret void +} + +; FUNC-LABEL: {{^}}atomic_inc_add +; EG: MEM_RAT ATOMIC_INC_UINT +define void @atomic_inc_add(i32 addrspace(1)* %out) { +entry: + %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 + %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 1 seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_dec_add +; EG: MEM_RAT ATOMIC_DEC_UINT +define void @atomic_dec_add(i32 addrspace(1)* %out) { +entry: + %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 + %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 -1 seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_inc_sub +; EG: MEM_RAT ATOMIC_INC_UINT +define void @atomic_inc_sub(i32 addrspace(1)* %out) { +entry: + %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 + %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 -1 seq_cst + ret void +} + +; FUNC-LABEL: {{^}}atomic_dec_sub +; EG: MEM_RAT ATOMIC_DEC_UINT +define void @atomic_dec_sub(i32 addrspace(1)* %out) { +entry: + %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 + %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 1 seq_cst + ret void +}