Index: lib/Target/R600/SIInstructions.td =================================================================== --- lib/Target/R600/SIInstructions.td +++ lib/Target/R600/SIInstructions.td @@ -2572,17 +2572,10 @@ (EXTRACT_SUBREG $value, sub1), $offset0, $offset1) >; -multiclass DSAtomicRetPat { - def : Pat < - (frag (add i32:$ptr, (i32 IMM16bit:$offset)), vt:$value), - (inst (i1 0), $ptr, $value, (as_i16imm $offset)) - >; - - def : Pat < - (frag i32:$ptr, vt:$val), - (inst 0, $ptr, $val, 0) - >; -} +class DSAtomicRetPat : Pat < + (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value), + (inst (i1 0), $ptr, $value, (as_i16imm $offset)) +>; // Special case of DSAtomicRetPat for add / sub 1 -> inc / dec // @@ -2594,69 +2587,56 @@ // We also load this -1 with s_mov_b32 / s_mov_b64 even though this // needs to be a VGPR. The SGPR copy pass will fix this, and it's // easier since there is no v_mov_b64. -multiclass DSAtomicIncRetPat { - def : Pat < - (frag (add i32:$ptr, (i32 IMM16bit:$offset)), (vt 1)), - (inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset)) - >; - - def : Pat < - (frag i32:$ptr, (vt 1)), - (inst 0, $ptr, (LoadImm (vt -1)), 0) - >; -} +class DSAtomicIncRetPat : Pat < + (frag (DS1Addr1Offset i32:$ptr, i32:$offset), (vt 1)), + (inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset)) +>; -multiclass DSAtomicCmpXChg { - def : Pat < - (frag (add i32:$ptr, (i32 IMM16bit:$offset)), vt:$cmp, vt:$swap), - (inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset)) - >; - def : Pat < - (frag i32:$ptr, vt:$cmp, vt:$swap), - (inst 0, $ptr, $cmp, $swap, 0) - >; -} +class DSAtomicCmpXChg : Pat < + (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap), + (inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset)) +>; // 32-bit atomics. -defm : DSAtomicIncRetPat; -defm : DSAtomicIncRetPat; - -defm : DSAtomicRetPat; -defm : DSAtomicRetPat; -defm : DSAtomicRetPat; -defm : DSAtomicRetPat; -defm : DSAtomicRetPat; -defm : DSAtomicRetPat; -defm : DSAtomicRetPat; -defm : DSAtomicRetPat; -defm : DSAtomicRetPat; -defm : DSAtomicRetPat; - -defm : DSAtomicCmpXChg; +def : DSAtomicIncRetPat; +def : DSAtomicIncRetPat; + +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; + +def : DSAtomicCmpXChg; // 64-bit atomics. -defm : DSAtomicIncRetPat; -defm : DSAtomicIncRetPat; - -defm : DSAtomicRetPat; -defm : DSAtomicRetPat; -defm : DSAtomicRetPat; -defm : DSAtomicRetPat; -defm : DSAtomicRetPat; -defm : DSAtomicRetPat; -defm : DSAtomicRetPat; -defm : DSAtomicRetPat; -defm : DSAtomicRetPat; -defm : DSAtomicRetPat; - -defm : DSAtomicCmpXChg; +def : DSAtomicIncRetPat; +def : DSAtomicIncRetPat; + +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; + +def : DSAtomicCmpXChg; //===----------------------------------------------------------------------===// Index: test/CodeGen/R600/atomic_cmp_swap_local.ll =================================================================== --- test/CodeGen/R600/atomic_cmp_swap_local.ll +++ test/CodeGen/R600/atomic_cmp_swap_local.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s ; FUNC-LABEL: @lds_atomic_cmpxchg_ret_i32_offset: ; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb @@ -35,3 +36,17 @@ store i64 %result, i64 addrspace(1)* %out, align 8 ret void } + +; FUNC-LABEL: @lds_atomic_cmpxchg_ret_i32_bad_si_offset +; SI: DS_CMPST_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x0 +; CI: DS_CMPST_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 +; SI: S_ENDPGM +define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap, i32 %a, i32 %b) nounwind { + %sub = sub i32 %a, %b + %add = add i32 %sub, 4 + %gep = getelementptr i32 addrspace(3)* %ptr, i32 %add + %pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic + %result = extractvalue { i32, i1 } %pair, 0 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} Index: test/CodeGen/R600/local-atomics.ll =================================================================== --- test/CodeGen/R600/local-atomics.ll +++ test/CodeGen/R600/local-atomics.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s ; FUNC-LABEL: @lds_atomic_xchg_ret_i32: ; SI: S_LOAD_DWORD [[SPTR:s[0-9]+]], @@ -47,6 +48,19 @@ ret void } +; FUNC-LABEL: @lds_atomic_add_ret_i32_bad_si_offset +; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x0 +; CI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 +; SI: S_ENDPGM +define void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind { + %sub = sub i32 %a, %b + %add = add i32 %sub, 4 + %gep = getelementptr i32 addrspace(3)* %ptr, i32 %add + %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + ; FUNC-LABEL: @lds_atomic_inc_ret_i32: ; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1 ; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]] @@ -70,6 +84,19 @@ ret void } +; FUNC-LABEL: @lds_atomic_inc_ret_i32_bad_si_offset: +; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x0 +; CI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 +; SI: S_ENDPGM +define void @lds_atomic_inc_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind { + %sub = sub i32 %a, %b + %add = add i32 %sub, 4 + %gep = getelementptr i32 addrspace(3)* %ptr, i32 %add + %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + ; FUNC-LABEL: @lds_atomic_sub_ret_i32: ; SI: DS_SUB_RTN_U32 ; SI: S_ENDPGM