diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -47,6 +47,10 @@ #define DEBUG_TYPE "si-memory-legalizer" #define PASS_NAME "SI Memory Legalizer" +static cl::opt AmdgcnSkipCacheInvalidations( + "amdgcn-skip-cache-invalidations", cl::init(false), cl::Hidden, + cl::desc("Use this to skip inserting cache invalidating instructions.")); + namespace { LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); @@ -254,7 +258,7 @@ IsaVersion IV; - /// Whether to insert cache invalidation instructions. + /// Whether to insert cache invalidating instructions. bool InsertCacheInv; SICacheControl(const GCNSubtarget &ST); @@ -653,7 +657,7 @@ SICacheControl::SICacheControl(const GCNSubtarget &ST) { TII = ST.getInstrInfo(); IV = getIsaVersion(ST.getCPU()); - InsertCacheInv = !ST.isAmdPalOS(); + InsertCacheInv = !AmdgcnSkipCacheInvalidations; } /* static */ diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll @@ -1,15 +1,23 @@ -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9 %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9 %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9 %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9 %s -; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN10 %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9,CACHE_INV %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9,CACHE_INV %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9,CACHE_INV %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9,CACHE_INV %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN10,CACHE_INV10 %s + +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -amdgcn-skip-cache-invalidations -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN9,SKIP_CACHE_INV %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -amdgcn-skip-cache-invalidations -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GCN10,SKIP_CACHE_INV %s + ; FUNC-LABEL: {{^}}system_acquire: ; GCN: %bb.0 ; GCN-NOT: ATOMIC_FENCE ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} ; GCN10: s_waitcnt_vscnt null, 0x0 -; GCN-NOT: buffer_wbinvl1{{$}} +; CACHE_INV: buffer_wbinvl1{{$}} +; CACHE_INV10: buffer_gl0_inv +; CACHE_INV10: buffer_gl1_inv +; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}} +; SKIP_CACHE_INV-NOT: buffer_gl ; GCN: s_endpgm define amdgpu_kernel void @system_acquire() { entry: @@ -34,7 +42,11 @@ ; GCN-NOT: ATOMIC_FENCE ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} ; GCN10: s_waitcnt_vscnt null, 0x0 -; GCN-NOT: buffer_wbinvl1{{$}} +; CACHE_INV: buffer_wbinvl1{{$}} +; CACHE_INV10: buffer_gl0_inv +; CACHE_INV10: buffer_gl1_inv +; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}} +; SKIP_CACHE_INV-NOT: buffer_gl ; GCN: s_endpgm define amdgpu_kernel void @system_acq_rel() { entry: @@ -47,7 +59,11 @@ ; GCN-NOT: ATOMIC_FENCE ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} ; GCN10: s_waitcnt_vscnt null, 0x0 -; GCN-NOT: buffer_wbinvl1{{$}} +; CACHE_INV: buffer_wbinvl1{{$}} +; CACHE_INV10: buffer_gl0_inv +; CACHE_INV10: buffer_gl1_inv +; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}} +; SKIP_CACHE_INV-NOT: buffer_gl ; GCN: s_endpgm define amdgpu_kernel void @system_seq_cst() { entry: @@ -60,7 +76,11 @@ ; GCN-NOT: ATOMIC_FENCE ; GCN: s_waitcnt vmcnt(0){{$}} ; GCN10: s_waitcnt_vscnt null, 0x0 -; GCN-NOT: buffer_wbinvl1{{$}} +; CACHE_INV: buffer_wbinvl1{{$}} +; CACHE_INV10: buffer_gl0_inv +; CACHE_INV10: buffer_gl1_inv +; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}} +; SKIP_CACHE_INV-NOT: buffer_gl ; GCN: s_endpgm define amdgpu_kernel void @system_one_as_acquire() { entry: @@ -85,7 +105,11 @@ ; GCN-NOT: ATOMIC_FENCE ; GCN: s_waitcnt vmcnt(0){{$}} ; GCN10: s_waitcnt_vscnt null, 0x0 -; GCN-NOT: buffer_wbinvl1{{$}} +; CACHE_INV: buffer_wbinvl1{{$}} +; CACHE_INV10: buffer_gl0_inv +; CACHE_INV10: buffer_gl1_inv +; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}} +; SKIP_CACHE_INV-NOT: buffer_gl ; GCN: s_endpgm define amdgpu_kernel void @system_one_as_acq_rel() { entry: @@ -98,7 +122,11 @@ ; GCN-NOT: ATOMIC_FENCE ; GCN: s_waitcnt vmcnt(0){{$}} ; GCN10: s_waitcnt_vscnt null, 0x0 -; GCN-NOT: buffer_wbinvl1{{$}} +; CACHE_INV: buffer_wbinvl1{{$}} +; CACHE_INV10: buffer_gl0_inv +; CACHE_INV10: buffer_gl1_inv +; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}} +; SKIP_CACHE_INV-NOT: buffer_gl ; GCN: s_endpgm define amdgpu_kernel void @system_one_as_seq_cst() { entry: @@ -191,7 +219,11 @@ ; GCN-NOT: ATOMIC_FENCE ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} ; GCN10: s_waitcnt_vscnt null, 0x0 -; GCN-NOT: buffer_wbinvl1{{$}} +; CACHE_INV: buffer_wbinvl1{{$}} +; CACHE_INV10: buffer_gl0_inv +; CACHE_INV10: buffer_gl1_inv +; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}} +; SKIP_CACHE_INV-NOT: buffer_gl ; GCN: s_endpgm define amdgpu_kernel void @agent_acquire() { entry: @@ -216,7 +248,11 @@ ; GCN-NOT: ATOMIC_FENCE ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} ; GCN10: s_waitcnt_vscnt null, 0x0 -; GCN-NOT: buffer_wbinvl1{{$}} +; CACHE_INV: buffer_wbinvl1{{$}} +; CACHE_INV10: buffer_gl0_inv +; CACHE_INV10: buffer_gl1_inv +; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}} +; SKIP_CACHE_INV-NOT: buffer_gl ; GCN: s_endpgm define amdgpu_kernel void @agent_acq_rel() { entry: @@ -229,7 +265,11 @@ ; GCN-NOT: ATOMIC_FENCE ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} ; GCN10: s_waitcnt_vscnt null, 0x0 -; GCN-NOT: buffer_wbinvl1{{$}} +; CACHE_INV: buffer_wbinvl1{{$}} +; CACHE_INV10: buffer_gl0_inv +; CACHE_INV10: buffer_gl1_inv +; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}} +; SKIP_CACHE_INV-NOT: buffer_gl ; GCN: s_endpgm define amdgpu_kernel void @agent_seq_cst() { entry: @@ -242,7 +282,11 @@ ; GCN-NOT: ATOMIC_FENCE ; GCN: s_waitcnt vmcnt(0){{$}} ; GCN10: s_waitcnt_vscnt null, 0x0 -; GCN-NOT: buffer_wbinvl1{{$}} +; CACHE_INV: buffer_wbinvl1{{$}} +; CACHE_INV10: buffer_gl0_inv +; CACHE_INV10: buffer_gl1_inv +; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}} +; SKIP_CACHE_INV-NOT: buffer_gl ; GCN: s_endpgm define amdgpu_kernel void @agent_one_as_acquire() { entry: @@ -267,7 +311,11 @@ ; GCN-NOT: ATOMIC_FENCE ; GCN: s_waitcnt vmcnt(0){{$}} ; GCN10: s_waitcnt_vscnt null, 0x0 -; GCN-NOT: buffer_wbinvl1{{$}} +; CACHE_INV: buffer_wbinvl1{{$}} +; CACHE_INV10: buffer_gl0_inv +; CACHE_INV10: buffer_gl1_inv +; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}} +; SKIP_CACHE_INV-NOT: buffer_gl ; GCN: s_endpgm define amdgpu_kernel void @agent_one_as_acq_rel() { entry: @@ -280,7 +328,11 @@ ; GCN-NOT: ATOMIC_FENCE ; GCN: s_waitcnt vmcnt(0){{$}} ; GCN10: s_waitcnt_vscnt null, 0x0 -; GCN-NOT: buffer_wbinvl1{{$}} +; CACHE_INV: buffer_wbinvl1{{$}} +; CACHE_INV10: buffer_gl0_inv +; CACHE_INV10: buffer_gl1_inv +; SKIP_CACHE_INV-NOT: buffer_wbinvl1{{$}} +; SKIP_CACHE_INV-NOT: buffer_gl ; GCN: s_endpgm define amdgpu_kernel void @agent_one_as_seq_cst() { entry: