diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -422,6 +422,16 @@ let MemoryVT = i16; } +def atomic_load_8_#as : PatFrag<(ops node:$ptr), (atomic_load_8 node:$ptr)> { + let IsAtomic = 1; + let MemoryVT = i8; +} + +def atomic_load_16_#as : PatFrag<(ops node:$ptr), (atomic_load_16 node:$ptr)> { + let IsAtomic = 1; + let MemoryVT = i16; +} + def atomic_load_32_#as : PatFrag<(ops node:$ptr), (atomic_load_32 node:$ptr)> { let IsAtomic = 1; let MemoryVT = i32; diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -714,6 +714,10 @@ defm : DSReadPat_mc ; } +defm : DSReadPat_mc ; +defm : DSReadPat_mc ; +defm : DSReadPat_mc ; +defm : DSReadPat_mc ; defm : DSReadPat_mc ; defm : DSReadPat_mc ; @@ -774,6 +778,10 @@ defm : DSWritePat_mc ; } +defm : DSAtomicWritePat_mc ; +defm : DSAtomicWritePat_mc ; +defm : DSAtomicWritePat_mc ; +defm : DSAtomicWritePat_mc ; defm : DSAtomicWritePat_mc ; defm : DSAtomicWritePat_mc ; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -333,6 +333,18 @@ let IsNonExtLoad = 1; } +def atomic_load_8_glue : PatFrag<(ops node:$ptr), + (AMDGPUatomic_ld_glue node:$ptr)> { + let IsAtomic = 1; + let MemoryVT = i8; +} + +def atomic_load_16_glue : PatFrag<(ops node:$ptr), + (AMDGPUatomic_ld_glue node:$ptr)> { + let IsAtomic = 1; + let MemoryVT = i16; +} + def atomic_load_32_glue : PatFrag<(ops node:$ptr), (AMDGPUatomic_ld_glue node:$ptr)> { let IsAtomic = 1; @@ -423,6 +435,14 @@ } // End IsLoad = 1 let IsAtomic = 1, AddressSpaces = LoadAddress_local.AddrSpaces in { +def atomic_load_8_local_m0 : PatFrag<(ops node:$ptr), + (atomic_load_8_glue node:$ptr)> { + let MemoryVT = i8; +} +def atomic_load_16_local_m0 : PatFrag<(ops node:$ptr), + (atomic_load_16_glue node:$ptr)> { + let MemoryVT = i16; +} def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr), (atomic_load_32_glue node:$ptr)> { let MemoryVT = i32; @@ -509,6 +529,18 @@ let AddressSpaces = StoreAddress_local.AddrSpaces in { +def atomic_store_local_8_m0 : PatFrag < + (ops node:$value, node:$ptr), + (AMDGPUatomic_st_glue node:$value, node:$ptr)> { + let IsAtomic = 1; + let MemoryVT = i8; +} +def atomic_store_local_16_m0 : PatFrag < + (ops node:$value, node:$ptr), + (AMDGPUatomic_st_glue node:$value, node:$ptr)> { + let IsAtomic = 1; + let MemoryVT = i16; +} def atomic_store_local_32_m0 : PatFrag < (ops node:$value, node:$ptr), (AMDGPUatomic_st_glue node:$value, node:$ptr)> { diff --git a/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local.ll copy from llvm/test/CodeGen/AMDGPU/atomic_load_local.ll copy to llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local.ll --- a/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local.ll @@ -1,5 +1,55 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: llc -global-isel -global-isel-abort=0 -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s +; RUN: llc -global-isel -global-isel-abort=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s + +; GCN-LABEL: {{^}}atomic_load_monotonic_i8: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_read_u8 v0, v0{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define i8 @atomic_load_monotonic_i8(i8 addrspace(3)* %ptr) { + %load = load atomic i8, i8 addrspace(3)* %ptr monotonic, align 1 + ret i8 %load +} + +; GCN-LABEL: {{^}}atomic_load_monotonic_i8_offset: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_read_u8 v0, v0 offset:16{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define i8 @atomic_load_monotonic_i8_offset(i8 addrspace(3)* %ptr) { + %gep = getelementptr inbounds i8, i8 addrspace(3)* %ptr, i8 16 + %load = load atomic i8, i8 addrspace(3)* %gep monotonic, align 1 + ret i8 %load +} + +; GCN-LABEL: {{^}}atomic_load_monotonic_i16: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_read_u16 v0, v0{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define i16 @atomic_load_monotonic_i16(i16 addrspace(3)* %ptr) { + %load = load atomic i16, i16 addrspace(3)* %ptr monotonic, align 2 + ret i16 %load +} + +; GCN-LABEL: {{^}}atomic_load_monotonic_i16_offset: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_read_u16 v0, v0 offset:32{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define i16 @atomic_load_monotonic_i16_offset(i16 addrspace(3)* %ptr) { + %gep = getelementptr inbounds i16, i16 addrspace(3)* %ptr, i16 16 + %load = load atomic i16, i16 addrspace(3)* %gep monotonic, align 2 + ret i16 %load +} ; GCN-LABEL: {{^}}atomic_load_monotonic_i32: ; GCN: s_waitcnt diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_store_local.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_store_local.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_store_local.ll @@ -0,0 +1,103 @@ +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s + +; GCN-LABEL: {{^}}atomic_store_monotonic_i8: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_write_b8 v0, v1{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define void @atomic_store_monotonic_i8(i8 addrspace(3)* %ptr, i8 %val) { + store atomic i8 %val, i8 addrspace(3)* %ptr monotonic, align 1 + ret void +} + +; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i8: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_write_b8 v0, v1 offset:16{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define void @atomic_store_monotonic_offset_i8(i8 addrspace(3)* %ptr, i8 %val) { + %gep = getelementptr inbounds i8, i8 addrspace(3)* %ptr, i8 16 + store atomic i8 %val, i8 addrspace(3)* %gep monotonic, align 1 + ret void +} + +; GCN-LABEL: {{^}}atomic_store_monotonic_i16: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_write_b16 v0, v1{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define void @atomic_store_monotonic_i16(i16 addrspace(3)* %ptr, i16 %val) { + store atomic i16 %val, i16 addrspace(3)* %ptr monotonic, align 2 + ret void +} + +; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i16: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_write_b16 v0, v1 offset:32{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define void @atomic_store_monotonic_offset_i16(i16 addrspace(3)* %ptr, i16 %val) { + %gep = getelementptr inbounds i16, i16 addrspace(3)* %ptr, i16 16 + store atomic i16 %val, i16 addrspace(3)* %gep monotonic, align 2 + ret void +} + +; GCN-LABEL: {{^}}atomic_store_monotonic_i32: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_write_b32 v0, v1{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define void @atomic_store_monotonic_i32(i32 addrspace(3)* %ptr, i32 %val) { + store atomic i32 %val, i32 addrspace(3)* %ptr monotonic, align 4 + ret void +} + +; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i32: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_write_b32 v0, v1 offset:64{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define void @atomic_store_monotonic_offset_i32(i32 addrspace(3)* %ptr, i32 %val) { + %gep = getelementptr inbounds i32, i32 addrspace(3)* %ptr, i32 16 + store atomic i32 %val, i32 addrspace(3)* %gep monotonic, align 4 + ret void +} + +; GCN-LABEL: {{^}}atomic_store_monotonic_i64: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_write_b64 v0, v[1:2]{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define void @atomic_store_monotonic_i64(i64 addrspace(3)* %ptr, i64 %val) { + store atomic i64 %val, i64 addrspace(3)* %ptr monotonic, align 8 + ret void +} + +; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i64: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_write_b64 v0, v[1:2] offset:128{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define void @atomic_store_monotonic_offset_i64(i64 addrspace(3)* %ptr, i64 %val) { + %gep = getelementptr inbounds i64, i64 addrspace(3)* %ptr, i64 16 + store atomic i64 %val, i64 addrspace(3)* %gep monotonic, align 8 + ret void +} + diff --git a/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll b/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll --- a/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll @@ -1,6 +1,56 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s +; GCN-LABEL: {{^}}atomic_load_monotonic_i8: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_read_u8 v0, v0{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define i8 @atomic_load_monotonic_i8(i8 addrspace(3)* %ptr) { + %load = load atomic i8, i8 addrspace(3)* %ptr monotonic, align 1 + ret i8 %load +} + +; GCN-LABEL: {{^}}atomic_load_monotonic_i8_offset: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_read_u8 v0, v0 offset:16{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define i8 @atomic_load_monotonic_i8_offset(i8 addrspace(3)* %ptr) { + %gep = getelementptr inbounds i8, i8 addrspace(3)* %ptr, i8 16 + %load = load atomic i8, i8 addrspace(3)* %gep monotonic, align 1 + ret i8 %load +} + +; GCN-LABEL: {{^}}atomic_load_monotonic_i16: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_read_u16 v0, v0{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define i16 @atomic_load_monotonic_i16(i16 addrspace(3)* %ptr) { + %load = load atomic i16, i16 addrspace(3)* %ptr monotonic, align 2 + ret i16 %load +} + +; GCN-LABEL: {{^}}atomic_load_monotonic_i16_offset: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_read_u16 v0, v0 offset:32{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define i16 @atomic_load_monotonic_i16_offset(i16 addrspace(3)* %ptr) { + %gep = getelementptr inbounds i16, i16 addrspace(3)* %ptr, i16 16 + %load = load atomic i16, i16 addrspace(3)* %gep monotonic, align 2 + ret i16 %load +} + ; GCN-LABEL: {{^}}atomic_load_monotonic_i32: ; GCN: s_waitcnt ; GFX9-NOT: s_mov_b32 m0 diff --git a/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll b/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll --- a/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll @@ -1,6 +1,56 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s +; GCN-LABEL: {{^}}atomic_store_monotonic_i8: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_write_b8 v0, v1{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define void @atomic_store_monotonic_i8(i8 addrspace(3)* %ptr, i8 %val) { + store atomic i8 %val, i8 addrspace(3)* %ptr monotonic, align 1 + ret void +} + +; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i8: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_write_b8 v0, v1 offset:16{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define void @atomic_store_monotonic_offset_i8(i8 addrspace(3)* %ptr, i8 %val) { + %gep = getelementptr inbounds i8, i8 addrspace(3)* %ptr, i8 16 + store atomic i8 %val, i8 addrspace(3)* %gep monotonic, align 1 + ret void +} + +; GCN-LABEL: {{^}}atomic_store_monotonic_i16: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_write_b16 v0, v1{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define void @atomic_store_monotonic_i16(i16 addrspace(3)* %ptr, i16 %val) { + store atomic i16 %val, i16 addrspace(3)* %ptr monotonic, align 2 + ret void +} + +; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i16: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_write_b16 v0, v1 offset:32{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define void @atomic_store_monotonic_offset_i16(i16 addrspace(3)* %ptr, i16 %val) { + %gep = getelementptr inbounds i16, i16 addrspace(3)* %ptr, i16 16 + store atomic i16 %val, i16 addrspace(3)* %gep monotonic, align 2 + ret void +} + ; GCN-LABEL: {{^}}atomic_store_monotonic_i32: ; GCN: s_waitcnt ; GFX9-NOT: s_mov_b32 m0