Index: lib/Target/R600/AMDGPUInstructions.td =================================================================== --- lib/Target/R600/AMDGPUInstructions.td +++ lib/Target/R600/AMDGPUInstructions.td @@ -232,16 +232,25 @@ return isLocalLoad(dyn_cast(N)); }]>; -def atomic_load_add_local : PatFrag<(ops node:$ptr, node:$value), - (atomic_load_add node:$ptr, node:$value), [{ - return dyn_cast(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; -}]>; -def atomic_load_sub_local : PatFrag<(ops node:$ptr, node:$value), - (atomic_load_sub node:$ptr, node:$value), [{ - return dyn_cast(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; +class local_binary_atomic_op : + PatFrag<(ops node:$ptr, node:$value), + (atomic_op node:$ptr, node:$value), [{ + return cast(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; }]>; + +def atomic_load_add_local : local_binary_atomic_op; +def atomic_load_sub_local : local_binary_atomic_op; +def atomic_load_and_local : local_binary_atomic_op; +def atomic_load_or_local : local_binary_atomic_op; +def atomic_load_xor_local : local_binary_atomic_op; +def atomic_load_nand_local : local_binary_atomic_op; +def atomic_load_min_local : local_binary_atomic_op; +def atomic_load_max_local : local_binary_atomic_op; +def atomic_load_umin_local : local_binary_atomic_op; +def atomic_load_umax_local : local_binary_atomic_op; + def mskor_global : PatFrag<(ops node:$val, node:$ptr), (AMDGPUstore_mskor node:$val, node:$ptr), [{ return dyn_cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; Index: lib/Target/R600/SIInstructions.td =================================================================== --- lib/Target/R600/SIInstructions.td +++ lib/Target/R600/SIInstructions.td @@ -2149,11 +2149,20 @@ defm : DSWritePat ; defm : DSWritePat ; -def : Pat <(atomic_load_add_local i32:$ptr, i32:$val), - (DS_ADD_U32_RTN 0, $ptr, $val, 0)>; +multiclass DSAtomicPat { + def : Pat < + (frag (add i32:$ptr, (i32 IMM16bit:$offset)), vt:$value), + (inst (i1 0), $ptr, $value, (as_i16imm $offset)) + >; + + def : Pat < + (frag vt:$src1, i32:$src0), + (inst 0, $src0, $src1, 0) + >; +} -def : Pat <(atomic_load_sub_local i32:$ptr, i32:$val), - (DS_SUB_U32_RTN 0, $ptr, $val, 0)>; +defm : DSAtomicPat; +defm : DSAtomicPat; //===----------------------------------------------------------------------===// // MUBUF Patterns Index: test/CodeGen/R600/atomic_load_add.ll =================================================================== --- test/CodeGen/R600/atomic_load_add.ll +++ test/CodeGen/R600/atomic_load_add.ll @@ -1,23 +1,38 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s -; R600-CHECK-LABEL: @atomic_add_local -; R600-CHECK: LDS_ADD * -; SI-CHECK-LABEL: @atomic_add_local -; SI-CHECK: DS_ADD_U32_RTN +; FUNC-LABEL: @atomic_add_local +; R600: LDS_ADD * +; SI: DS_ADD_U32_RTN define void @atomic_add_local(i32 addrspace(3)* %local) { -entry: - %0 = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst + %unused = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst ret void } -; R600-CHECK-LABEL: @atomic_add_ret_local -; R600-CHECK: LDS_ADD_RET * -; SI-CHECK-LABEL: @atomic_add_ret_local -; SI-CHECK: DS_ADD_U32_RTN +; FUNC-LABEL: @atomic_add_local_const_offset +; R600: LDS_ADD * +; SI: DS_ADD_U32_RTN v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 +define void @atomic_add_local_const_offset(i32 addrspace(3)* %local) { + %gep = getelementptr i32 addrspace(3)* %local, i32 4 + %val = atomicrmw volatile add i32 addrspace(3)* %gep, i32 5 seq_cst + ret void +} + +; FUNC-LABEL: @atomic_add_ret_local +; R600: LDS_ADD_RET * +; SI: DS_ADD_U32_RTN define void @atomic_add_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) { -entry: - %0 = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst - store i32 %0, i32 addrspace(1)* %out + %val = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @atomic_add_ret_local_const_offset +; R600: LDS_ADD_RET * +; SI: DS_ADD_U32_RTN v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x14 +define void @atomic_add_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) { + %gep = getelementptr i32 addrspace(3)* %local, i32 5 + %val = atomicrmw volatile add i32 addrspace(3)* %gep, i32 5 seq_cst + store i32 %val, i32 addrspace(1)* %out ret void } Index: test/CodeGen/R600/atomic_load_sub.ll =================================================================== --- test/CodeGen/R600/atomic_load_sub.ll +++ test/CodeGen/R600/atomic_load_sub.ll @@ -1,23 +1,38 @@ -; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK -; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; R600-CHECK-LABEL: @atomic_sub_local -; R600-CHECK: LDS_SUB * -; SI-CHECK-LABEL: @atomic_sub_local -; SI-CHECK: DS_SUB_U32_RTN +; FUNC-LABEL: @atomic_sub_local +; R600: LDS_SUB * +; SI: DS_SUB_U32_RTN define void @atomic_sub_local(i32 addrspace(3)* %local) { -entry: - %0 = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst + %unused = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst ret void } -; R600-CHECK-LABEL: @atomic_sub_ret_local -; R600-CHECK: LDS_SUB_RET * -; SI-CHECK-LABEL: @atomic_sub_ret_local -; SI-CHECK: DS_SUB_U32_RTN +; FUNC-LABEL: @atomic_sub_local_const_offset +; R600: LDS_SUB * +; SI: DS_SUB_U32_RTN v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10 +define void @atomic_sub_local_const_offset(i32 addrspace(3)* %local) { + %gep = getelementptr i32 addrspace(3)* %local, i32 4 + %val = atomicrmw volatile sub i32 addrspace(3)* %gep, i32 5 seq_cst + ret void +} + +; FUNC-LABEL: @atomic_sub_ret_local +; R600: LDS_SUB_RET * +; SI: DS_SUB_U32_RTN define void @atomic_sub_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) { -entry: - %0 = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst - store i32 %0, i32 addrspace(1)* %out + %val = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: @atomic_sub_ret_local_const_offset +; R600: LDS_SUB_RET * +; SI: DS_SUB_U32_RTN v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x14 +define void @atomic_sub_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) { + %gep = getelementptr i32 addrspace(3)* %local, i32 5 + %val = atomicrmw volatile sub i32 addrspace(3)* %gep, i32 5 seq_cst + store i32 %val, i32 addrspace(1)* %out ret void }