Index: llvm/lib/Target/AMDGPU/FLATInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/FLATInstructions.td +++ llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -783,6 +783,11 @@ (inst VReg_64:$vaddr, getVregSrcForVT.ret:$data, $offset) >; +class FlatSignedAtomicPatNoRtn : GCNPat < + (node (FLATOffsetSigned i64:$vaddr, i16:$offset), vt:$data), + (inst VReg_64:$vaddr, getVregSrcForVT.ret:$data, $offset) +>; + class FlatSignedAtomicPat : GCNPat < (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset), data_vt:$data)), @@ -971,8 +976,8 @@ def : FlatSignedAtomicPat ; def : FlatSignedAtomicPat ; -def : FlatAtomicPatNoRtn ; -def : FlatAtomicPatNoRtn ; +def : FlatSignedAtomicPatNoRtn ; +def : FlatSignedAtomicPatNoRtn ; } // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll +++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll @@ -54,6 +54,15 @@ ret void } +; GCN-LABEL: {{^}}global_atomic_add_f32_offneg4: +; GCN: global_atomic_add_f32 v[{{[0-9:]+}}], v{{[0-9]+}}, off offset:-4 +define amdgpu_kernel void @global_atomic_add_f32_offneg4(float addrspace(1)* %ptr, float %data) { +main_body: + %p = getelementptr float, float addrspace(1)* %ptr, i64 -1 + call void @llvm.amdgcn.global.atomic.fadd.p1f32.f32(float addrspace(1)* %p, float %data) + ret void +} + ; GCN-LABEL: {{^}}global_atomic_pk_add_v2f16: ; GCN: global_atomic_pk_add_f16 v[{{[0-9:]+}}], v{{[0-9]+}}, off define amdgpu_kernel void @global_atomic_pk_add_v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) { @@ -71,6 +80,15 @@ ret void } +; GCN-LABEL: {{^}}global_atomic_pk_add_v2f16_offneg4: +; GCN: global_atomic_pk_add_f16 v[{{[0-9:]+}}], v{{[0-9]+}}, off offset:-4 +define amdgpu_kernel void @global_atomic_pk_add_v2f16_offneg4(<2 x half> addrspace(1)* %ptr, <2 x half> %data) { +main_body: + %p = getelementptr <2 x half>, <2 x half> addrspace(1)* %ptr, i64 -1 + call void @llvm.amdgcn.global.atomic.fadd.p1v2f16.v2f16(<2 x half> addrspace(1)* %p, <2 x half> %data) + ret void +} + ; Make sure this artificially selects with an incorrect subtarget, but ; the feature set. ; GCN-LABEL: {{^}}global_atomic_fadd_f32_wrong_subtarget: