diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -254,6 +254,9 @@ IsaVersion IV; + /// Whether to insert cache invalidation instructions. + bool InsertCacheInv; + SICacheControl(const GCNSubtarget &ST); public: @@ -650,6 +653,7 @@ SICacheControl::SICacheControl(const GCNSubtarget &ST) { TII = ST.getInstrInfo(); IV = getIsaVersion(ST.getCPU()); + InsertCacheInv = !ST.isAmdPalOS(); } /* static */ @@ -714,6 +718,9 @@ SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace, Position Pos) const { + if (!InsertCacheInv) + return false; + bool Changed = false; MachineBasicBlock &MBB = *MI->getParent(); @@ -852,6 +859,9 @@ SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace, Position Pos) const { + if (!InsertCacheInv) + return false; + bool Changed = false; MachineBasicBlock &MBB = *MI->getParent(); @@ -954,6 +964,9 @@ SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace, Position Pos) const { + if (!InsertCacheInv) + return false; + bool Changed = false; MachineBasicBlock &MBB = *MI->getParent(); diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll @@ -1,13 +1,15 @@ -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx800 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9 %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9 %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9 %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9 %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN10 %s ; FUNC-LABEL: {{^}}system_acquire: ; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} -; GCN-NEXT: buffer_wbinvl1{{$}} +; GCN10: s_waitcnt_vscnt null, 0x0 +; GCN-NOT: buffer_wbinvl1{{$}} ; GCN: s_endpgm define amdgpu_kernel void @system_acquire() { entry: @@ -19,6 +21,7 @@ ; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GCN10: s_waitcnt_vscnt null, 0x0 ; GCN: s_endpgm define amdgpu_kernel void @system_release() { entry: @@ -30,7 +33,8 @@ ; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} -; GCN: buffer_wbinvl1{{$}} +; GCN10: s_waitcnt_vscnt null, 0x0 +; GCN-NOT: buffer_wbinvl1{{$}} ; GCN: s_endpgm define amdgpu_kernel void @system_acq_rel() { entry: @@ -42,7 +46,8 @@ ; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} -; GCN: buffer_wbinvl1{{$}} +; GCN10: s_waitcnt_vscnt null, 0x0 +; GCN-NOT: buffer_wbinvl1{{$}} ; GCN: s_endpgm define amdgpu_kernel void @system_seq_cst() { entry: @@ -54,7 +59,8 @@ ; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_waitcnt vmcnt(0){{$}} -; GCN-NEXT: buffer_wbinvl1{{$}} +; GCN10: s_waitcnt_vscnt null, 0x0 +; GCN-NOT: buffer_wbinvl1{{$}} ; GCN: s_endpgm define amdgpu_kernel void @system_one_as_acquire() { entry: @@ -66,6 +72,7 @@ ; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_waitcnt vmcnt(0){{$}} +; GCN10: s_waitcnt_vscnt null, 0x0 ; GCN: s_endpgm define amdgpu_kernel void @system_one_as_release() { entry: @@ -77,7 +84,8 @@ ; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_waitcnt vmcnt(0){{$}} -; GCN: buffer_wbinvl1{{$}} +; GCN10: s_waitcnt_vscnt null, 0x0 +; GCN-NOT: buffer_wbinvl1{{$}} ; GCN: s_endpgm define amdgpu_kernel void @system_one_as_acq_rel() { entry: @@ -89,7 +97,8 @@ ; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_waitcnt vmcnt(0){{$}} -; GCN: buffer_wbinvl1{{$}} +; GCN10: s_waitcnt_vscnt null, 0x0 +; GCN-NOT: buffer_wbinvl1{{$}} ; GCN: s_endpgm define amdgpu_kernel void @system_one_as_seq_cst() { entry: @@ -181,7 +190,8 @@ ; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} -; GCN-NEXT: buffer_wbinvl1{{$}} +; GCN10: s_waitcnt_vscnt null, 0x0 +; GCN-NOT: buffer_wbinvl1{{$}} ; GCN: s_endpgm define amdgpu_kernel void @agent_acquire() { entry: @@ -193,6 +203,7 @@ ; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GCN10: s_waitcnt_vscnt null, 0x0 ; GCN: s_endpgm define amdgpu_kernel void @agent_release() { entry: @@ -204,7 +215,8 @@ ; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} -; GCN: buffer_wbinvl1{{$}} +; GCN10: s_waitcnt_vscnt null, 0x0 +; GCN-NOT: buffer_wbinvl1{{$}} ; GCN: s_endpgm define amdgpu_kernel void @agent_acq_rel() { entry: @@ -216,7 +228,8 @@ ; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} -; GCN: buffer_wbinvl1{{$}} +; GCN10: s_waitcnt_vscnt null, 0x0 +; GCN-NOT: buffer_wbinvl1{{$}} ; GCN: s_endpgm define amdgpu_kernel void @agent_seq_cst() { entry: @@ -228,7 +241,8 @@ ; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_waitcnt vmcnt(0){{$}} -; GCN-NEXT: buffer_wbinvl1{{$}} +; GCN10: s_waitcnt_vscnt null, 0x0 +; GCN-NOT: buffer_wbinvl1{{$}} ; GCN: s_endpgm define amdgpu_kernel void @agent_one_as_acquire() { entry: @@ -240,6 +254,7 @@ ; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_waitcnt vmcnt(0){{$}} +; GCN10: s_waitcnt_vscnt null, 0x0 ; GCN: s_endpgm define amdgpu_kernel void @agent_one_as_release() { entry: @@ -251,7 +266,8 @@ ; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_waitcnt vmcnt(0){{$}} -; GCN: buffer_wbinvl1{{$}} +; GCN10: s_waitcnt_vscnt null, 0x0 +; GCN-NOT: buffer_wbinvl1{{$}} ; GCN: s_endpgm define amdgpu_kernel void @agent_one_as_acq_rel() { entry: @@ -263,7 +279,8 @@ ; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_waitcnt vmcnt(0){{$}} -; GCN: buffer_wbinvl1{{$}} +; GCN10: s_waitcnt_vscnt null, 0x0 +; GCN-NOT: buffer_wbinvl1{{$}} ; GCN: s_endpgm define amdgpu_kernel void @agent_one_as_seq_cst() { entry: @@ -273,7 +290,9 @@ ; FUNC-LABEL: {{^}}workgroup_acquire: ; GCN: %bb.0 -; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GCN9-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GCN10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GCN10: s_waitcnt_vscnt null, 0x0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_endpgm define amdgpu_kernel void @workgroup_acquire() { @@ -284,7 +303,9 @@ ; FUNC-LABEL: {{^}}workgroup_release: ; GCN: %bb.0 -; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GCN9-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GCN10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GCN10: s_waitcnt_vscnt null, 0x0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_endpgm define amdgpu_kernel void @workgroup_release() { @@ -295,7 +316,9 @@ ; FUNC-LABEL: {{^}}workgroup_acq_rel: ; GCN: %bb.0 -; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GCN9-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GCN10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GCN10: s_waitcnt_vscnt null, 0x0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_endpgm define amdgpu_kernel void @workgroup_acq_rel() { @@ -307,6 +330,9 @@ ; FUNC-LABEL: {{^}}workgroup_seq_cst: ; GCN: %bb.0 ; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GCN9-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GCN10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GCN10: s_waitcnt_vscnt null, 0x0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_endpgm define amdgpu_kernel void @workgroup_seq_cst() { @@ -317,7 +343,9 @@ ; FUNC-LABEL: {{^}}workgroup_one_as_acquire: ; GCN: %bb.0 -; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN9-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GCN10: s_waitcnt vmcnt(0) +; GCN10: s_waitcnt_vscnt null, 0x0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_endpgm define amdgpu_kernel void @workgroup_one_as_acquire() { @@ -328,7 +356,9 @@ ; FUNC-LABEL: {{^}}workgroup_one_as_release: ; GCN: %bb.0 -; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN9-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GCN10: s_waitcnt vmcnt(0) +; GCN10: s_waitcnt_vscnt null, 0x0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_endpgm define amdgpu_kernel void @workgroup_one_as_release() { @@ -339,7 +369,9 @@ ; FUNC-LABEL: {{^}}workgroup_one_as_acq_rel: ; GCN: %bb.0 -; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN9-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GCN10: s_waitcnt vmcnt(0) +; GCN10: s_waitcnt_vscnt null, 0x0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_endpgm define amdgpu_kernel void @workgroup_one_as_acq_rel() { @@ -350,7 +382,9 @@ ; FUNC-LABEL: {{^}}workgroup_one_as_seq_cst: ; GCN: %bb.0 -; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN9-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} +; GCN10: s_waitcnt vmcnt(0) +; GCN10: s_waitcnt_vscnt null, 0x0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_endpgm define amdgpu_kernel void @workgroup_one_as_seq_cst() {