diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -767,6 +767,12 @@ "Hazard when TRANS instructions are closely followed by a use of the result" >; +def FeatureForceStoreSC0SC1 : SubtargetFeature<"force-store-sc0-sc1", + "HasForceStoreSC0SC1", + "true", + "Has SC0 and SC1 on stores" +>; + //===------------------------------------------------------------===// // Subtarget Features (options and debugging) //===------------------------------------------------------------===// @@ -1229,11 +1235,11 @@ def FeatureISAVersion9_4_0 : FeatureSet< !listconcat(FeatureISAVersion9_4_Common.Features, - [])>; + [FeatureForceStoreSC0SC1])>; def FeatureISAVersion9_4_1 : FeatureSet< !listconcat(FeatureISAVersion9_4_Common.Features, - [])>; + [FeatureForceStoreSC0SC1])>; def FeatureISAVersion9_4_2 : FeatureSet< !listconcat(FeatureISAVersion9_4_Common.Features, diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -206,6 +206,7 @@ bool HasMADIntraFwdBug = false; bool HasVOPDInsts = false; bool HasVALUTransUseHazard = false; + bool HasForceStoreSC0SC1 = false; // Dummy feature to use for assembler in tablegen. bool FeatureDisable = false; @@ -1098,6 +1099,8 @@ bool hasVALUTransUseHazard() const { return HasVALUTransUseHazard; } + bool hasForceStoreSC0SC1() const { return HasForceStoreSC0SC1; } + bool hasVALUMaskWriteHazard() const { return getGeneration() >= GFX11; } /// Return if operations acting on VGPR tuples require even alignment. diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -351,6 +351,10 @@ /// Virtual destructor to allow derivations to be deleted. virtual ~SICacheControl() = default; + virtual bool tryForceStoreSC0SC1(const SIMemOpInfo &MOI, + MachineBasicBlock::iterator &MI) const { + return false; + } }; class SIGfx6CacheControl : public SICacheControl { @@ -509,6 +513,20 @@ bool insertRelease(MachineBasicBlock::iterator &MI, SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace, bool IsCrossAddrSpaceOrdering, Position Pos) const override; + + bool tryForceStoreSC0SC1(const SIMemOpInfo &MOI, + MachineBasicBlock::iterator &MI) const override { + bool Changed = false; + if (ST.hasForceStoreSC0SC1() && + (MOI.getInstrAddrSpace() & (SIAtomicAddrSpace::SCRATCH | + SIAtomicAddrSpace::GLOBAL | + SIAtomicAddrSpace::OTHER)) != + SIAtomicAddrSpace::NONE) { + Changed |= enableSC0Bit(MI); + Changed |= enableSC1Bit(MI); + } + return Changed; + } }; class SIGfx10CacheControl : public SIGfx7CacheControl { @@ -2324,9 +2342,10 @@ if (const auto &MOI = MOA.getLoadInfo(MI)) Changed |= expandLoad(*MOI, MI); - else if (const auto &MOI = MOA.getStoreInfo(MI)) + else if (const auto &MOI = MOA.getStoreInfo(MI)) { Changed |= expandStore(*MOI, MI); - else if (const auto &MOI = MOA.getAtomicFenceInfo(MI)) + Changed |= CC->tryForceStoreSC0SC1(*MOI, MI); + } else if (const auto &MOI = MOA.getAtomicFenceInfo(MI)) Changed |= expandAtomicFence(*MOI, MI); else if (const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(MI)) Changed |= expandAtomicCmpxchgOrRmw(*MOI, MI); diff --git a/llvm/test/CodeGen/AMDGPU/force-store-sc0-sc1.ll b/llvm/test/CodeGen/AMDGPU/force-store-sc0-sc1.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/force-store-sc0-sc1.ll @@ -0,0 +1,141 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,FORCESC0SC1 %s +; RUN: llc -march=amdgcn -mcpu=gfx941 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,FORCESC0SC1 %s +; RUN: llc -march=amdgcn -mcpu=gfx942 -verify-machineinstrs -mattr=-forcestoresc1 < %s | FileCheck --check-prefixes=GCN,NOSC0SC1 %s + +define amdgpu_kernel void @store_global(ptr addrspace(1) %ptr) { +; FORCESC0SC1-LABEL: store_global: +; FORCESC0SC1: ; %bb.0: ; %entry +; FORCESC0SC1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; FORCESC0SC1-NEXT: v_mov_b32_e32 v0, 0 +; FORCESC0SC1-NEXT: v_mov_b32_e32 v1, 1.0 +; FORCESC0SC1-NEXT: s_waitcnt lgkmcnt(0) +; FORCESC0SC1-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 +; FORCESC0SC1-NEXT: s_endpgm +; +; NOSC0SC1-LABEL: store_global: +; NOSC0SC1: ; %bb.0: ; %entry +; NOSC0SC1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; NOSC0SC1-NEXT: v_mov_b32_e32 v0, 0 +; NOSC0SC1-NEXT: v_mov_b32_e32 v1, 1.0 +; NOSC0SC1-NEXT: s_waitcnt lgkmcnt(0) +; NOSC0SC1-NEXT: global_store_dword v0, v1, s[0:1] +; NOSC0SC1-NEXT: s_endpgm +entry: + store float 1.000000e+00, ptr addrspace(1) %ptr, align 4 + ret void +} + +define amdgpu_kernel void @store_flat(ptr addrspace(0) %ptr) { +; FORCESC0SC1-LABEL: store_flat: +; FORCESC0SC1: ; %bb.0: ; %entry +; FORCESC0SC1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; FORCESC0SC1-NEXT: v_mov_b32_e32 v2, 1.0 +; FORCESC0SC1-NEXT: s_waitcnt lgkmcnt(0) +; FORCESC0SC1-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; FORCESC0SC1-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 +; FORCESC0SC1-NEXT: s_endpgm +; +; NOSC0SC1-LABEL: store_flat: +; NOSC0SC1: ; %bb.0: ; %entry +; NOSC0SC1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; NOSC0SC1-NEXT: v_mov_b32_e32 v2, 1.0 +; NOSC0SC1-NEXT: s_waitcnt lgkmcnt(0) +; NOSC0SC1-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; NOSC0SC1-NEXT: flat_store_dword v[0:1], v2 +; NOSC0SC1-NEXT: s_endpgm +entry: + store float 1.000000e+00, ptr addrspace(0) %ptr, align 4 + ret void +} + +define amdgpu_kernel void @store_lds(ptr addrspace(3) %ptr) { +; GCN-LABEL: store_lds: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_load_dword s0, s[0:1], 0x24 +; GCN-NEXT: v_mov_b32_e32 v0, 1.0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v1, s0 +; GCN-NEXT: ds_write_b32 v1, v0 +; GCN-NEXT: s_endpgm +entry: + store float 1.000000e+00, ptr addrspace(3) %ptr, align 4 + ret void +} + +define amdgpu_kernel void @store_scratch(ptr addrspace(5) %ptr) { +; FORCESC0SC1-LABEL: store_scratch: +; FORCESC0SC1: ; %bb.0: ; %entry +; FORCESC0SC1-NEXT: s_load_dword s0, s[0:1], 0x24 +; FORCESC0SC1-NEXT: v_mov_b32_e32 v0, 1.0 +; FORCESC0SC1-NEXT: s_waitcnt lgkmcnt(0) +; FORCESC0SC1-NEXT: scratch_store_dword off, v0, s0 sc0 sc1 +; FORCESC0SC1-NEXT: s_endpgm +; +; NOSC0SC1-LABEL: store_scratch: +; NOSC0SC1: ; %bb.0: ; %entry +; NOSC0SC1-NEXT: s_load_dword s0, s[0:1], 0x24 +; NOSC0SC1-NEXT: v_mov_b32_e32 v0, 1.0 +; NOSC0SC1-NEXT: s_waitcnt lgkmcnt(0) +; NOSC0SC1-NEXT: scratch_store_dword off, v0, s0 +; NOSC0SC1-NEXT: s_endpgm +entry: + store float 1.000000e+00, ptr addrspace(5) %ptr, align 4 + ret void +} + +define amdgpu_ps void @store_buffer(<4 x i32> inreg %rsrc, float %data, i32 %index) { +; FORCESC0SC1-LABEL: store_buffer: +; FORCESC0SC1: ; %bb.0: ; %main_body +; FORCESC0SC1-NEXT: buffer_store_dword v0, v1, s[0:3], 0 idxen sc0 sc1 +; FORCESC0SC1-NEXT: s_endpgm +; +; NOSC0SC1-LABEL: store_buffer: +; NOSC0SC1: ; %bb.0: ; %main_body +; NOSC0SC1-NEXT: buffer_store_dword v0, v1, s[0:3], 0 idxen +; NOSC0SC1-NEXT: s_endpgm +main_body: + call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> %rsrc, i32 %index, i32 0, i1 0, i1 0) + ret void +} + +define amdgpu_kernel void @store_global_atomic(ptr addrspace(1) %ptr) { +; FORCESC0SC1-LABEL: store_global_atomic: +; FORCESC0SC1: ; %bb.0: ; %entry +; FORCESC0SC1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; FORCESC0SC1-NEXT: v_mov_b32_e32 v0, 0 +; FORCESC0SC1-NEXT: v_mov_b32_e32 v1, 1.0 +; FORCESC0SC1-NEXT: buffer_wbl2 sc1 +; FORCESC0SC1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; FORCESC0SC1-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 +; FORCESC0SC1-NEXT: s_endpgm +; +; NOSC0SC1-LABEL: store_global_atomic: +; NOSC0SC1: ; %bb.0: ; %entry +; NOSC0SC1-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; NOSC0SC1-NEXT: v_mov_b32_e32 v0, 0 +; NOSC0SC1-NEXT: v_mov_b32_e32 v1, 1.0 +; NOSC0SC1-NEXT: buffer_wbl2 sc1 +; NOSC0SC1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; NOSC0SC1-NEXT: global_store_dword v0, v1, s[0:1] sc1 +; NOSC0SC1-NEXT: s_endpgm +entry: + store atomic float 1.000000e+00, ptr addrspace(1) %ptr syncscope("agent-one-as") seq_cst, align 4 + ret void +} + +define amdgpu_kernel void @store_global_atomic_system(ptr addrspace(1) %ptr) { +; GCN-LABEL: store_global_atomic_system: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GCN-NEXT: v_mov_b32_e32 v0, 0 +; GCN-NEXT: v_mov_b32_e32 v1, 1.0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1 +; GCN-NEXT: s_endpgm + store atomic float 1.000000e+00, ptr addrspace(1) %ptr monotonic, align 4 + ret void +} + + +declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx940.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx940.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx940.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx940.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX940,VGPRCD %s -; RUN: llc -march=amdgcn -mcpu=gfx940 -global-isel -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GISEL,VGPRCD %s -; RUN: llc -march=amdgcn -mcpu=gfx940 -stress-regalloc=10 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX940,AGPRCD %s -; RUN: llc -march=amdgcn -mcpu=gfx940 -stress-regalloc=10 -global-isel -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GISEL,AGPRCD %s +; RUN: llc -march=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX940,VGPRCD %s +; RUN: llc -march=amdgcn -mcpu=gfx942 -global-isel -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GISEL,VGPRCD %s +; RUN: llc -march=amdgcn -mcpu=gfx942 -stress-regalloc=10 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX940,AGPRCD %s +; RUN: llc -march=amdgcn -mcpu=gfx942 -stress-regalloc=10 -global-isel -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GISEL,AGPRCD %s declare <4 x i32> @llvm.amdgcn.mfma.i32.16x16x32.i8(i64, i64, <4 x i32>, i32, i32, i32) declare <16 x i32> @llvm.amdgcn.mfma.i32.32x32x16.i8(i64, i64, <16 x i32>, i32, i32, i32) diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll @@ -99,7 +99,7 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s3 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_unordered_load: @@ -112,7 +112,7 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s3 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_unordered_load: @@ -234,7 +234,7 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s3 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_monotonic_load: @@ -247,7 +247,7 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s3 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_monotonic_load: @@ -377,7 +377,7 @@ ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s3 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_acquire_load: @@ -391,7 +391,7 @@ ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s3 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_acquire_load: @@ -534,7 +534,7 @@ ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s3 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_seq_cst_load: @@ -549,7 +549,7 @@ ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s3 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_seq_cst_load: @@ -664,7 +664,7 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_unordered_store: @@ -674,7 +674,7 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_unordered_store: @@ -780,7 +780,7 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc1 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_monotonic_store: @@ -790,7 +790,7 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc1 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_monotonic_store: @@ -906,7 +906,7 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc1 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_release_store: @@ -918,7 +918,7 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc1 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_release_store: @@ -1038,7 +1038,7 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc1 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_seq_cst_store: @@ -1050,7 +1050,7 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc1 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_seq_cst_store: @@ -1885,7 +1885,7 @@ ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_acquire_ret_atomicrmw: @@ -1898,7 +1898,7 @@ ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_acquire_ret_atomicrmw: @@ -2045,7 +2045,7 @@ ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_acq_rel_ret_atomicrmw: @@ -2060,7 +2060,7 @@ ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_acq_rel_ret_atomicrmw: @@ -2211,7 +2211,7 @@ ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_seq_cst_ret_atomicrmw: @@ -2226,7 +2226,7 @@ ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_seq_cst_ret_atomicrmw: @@ -4596,7 +4596,7 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_monotonic_monotonic_ret_cmpxchg: @@ -4607,7 +4607,7 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_monotonic_monotonic_ret_cmpxchg: @@ -4749,7 +4749,7 @@ ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_acquire_monotonic_ret_cmpxchg: @@ -4761,7 +4761,7 @@ ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_acquire_monotonic_ret_cmpxchg: @@ -4909,7 +4909,7 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_release_monotonic_ret_cmpxchg: @@ -4922,7 +4922,7 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_release_monotonic_ret_cmpxchg: @@ -5078,7 +5078,7 @@ ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_acq_rel_monotonic_ret_cmpxchg: @@ -5092,7 +5092,7 @@ ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_acq_rel_monotonic_ret_cmpxchg: @@ -5252,7 +5252,7 @@ ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_seq_cst_monotonic_ret_cmpxchg: @@ -5266,7 +5266,7 @@ ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_seq_cst_monotonic_ret_cmpxchg: @@ -5416,7 +5416,7 @@ ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_monotonic_acquire_ret_cmpxchg: @@ -5428,7 +5428,7 @@ ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_monotonic_acquire_ret_cmpxchg: @@ -5574,7 +5574,7 @@ ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_acquire_acquire_ret_cmpxchg: @@ -5586,7 +5586,7 @@ ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_acquire_acquire_ret_cmpxchg: @@ -5742,7 +5742,7 @@ ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_release_acquire_ret_cmpxchg: @@ -5756,7 +5756,7 @@ ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_release_acquire_ret_cmpxchg: @@ -5916,7 +5916,7 @@ ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_acq_rel_acquire_ret_cmpxchg: @@ -5930,7 +5930,7 @@ ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_acq_rel_acquire_ret_cmpxchg: @@ -6090,7 +6090,7 @@ ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_seq_cst_acquire_ret_cmpxchg: @@ -6104,7 +6104,7 @@ ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_seq_cst_acquire_ret_cmpxchg: @@ -6264,7 +6264,7 @@ ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_monotonic_seq_cst_ret_cmpxchg: @@ -6278,7 +6278,7 @@ ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_monotonic_seq_cst_ret_cmpxchg: @@ -6438,7 +6438,7 @@ ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_acquire_seq_cst_ret_cmpxchg: @@ -6452,7 +6452,7 @@ ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_acquire_seq_cst_ret_cmpxchg: @@ -6612,7 +6612,7 @@ ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_release_seq_cst_ret_cmpxchg: @@ -6626,7 +6626,7 @@ ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_release_seq_cst_ret_cmpxchg: @@ -6786,7 +6786,7 @@ ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_acq_rel_seq_cst_ret_cmpxchg: @@ -6800,7 +6800,7 @@ ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_acq_rel_seq_cst_ret_cmpxchg: @@ -6960,7 +6960,7 @@ ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_seq_cst_seq_cst_ret_cmpxchg: @@ -6974,7 +6974,7 @@ ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_seq_cst_seq_cst_ret_cmpxchg: @@ -7106,7 +7106,7 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s3 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_one_as_unordered_load: @@ -7119,7 +7119,7 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s3 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_unordered_load: @@ -7241,7 +7241,7 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s3 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_one_as_monotonic_load: @@ -7254,7 +7254,7 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s3 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_monotonic_load: @@ -7390,7 +7390,7 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s3 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_one_as_acquire_load: @@ -7404,7 +7404,7 @@ ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s3 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_acquire_load: @@ -7555,7 +7555,7 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s3 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_one_as_seq_cst_load: @@ -7570,7 +7570,7 @@ ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s3 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_seq_cst_load: @@ -7687,7 +7687,7 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_one_as_unordered_store: @@ -7697,7 +7697,7 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_unordered_store: @@ -7803,7 +7803,7 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc1 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_one_as_monotonic_store: @@ -7813,7 +7813,7 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc1 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_monotonic_store: @@ -7929,7 +7929,7 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc1 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_one_as_release_store: @@ -7941,7 +7941,7 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc1 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_release_store: @@ -8061,7 +8061,7 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc1 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_one_as_seq_cst_store: @@ -8073,7 +8073,7 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v2, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc1 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_seq_cst_store: @@ -8901,7 +8901,7 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_one_as_acquire_ret_atomicrmw: @@ -8914,7 +8914,7 @@ ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_acquire_ret_atomicrmw: @@ -9068,7 +9068,7 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_one_as_acq_rel_ret_atomicrmw: @@ -9083,7 +9083,7 @@ ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_acq_rel_ret_atomicrmw: @@ -9241,7 +9241,7 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_one_as_seq_cst_ret_atomicrmw: @@ -9256,7 +9256,7 @@ ; GFX940-TGSPLIT-NEXT: flat_atomic_swap v2, v[0:1], v2 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_seq_cst_ret_atomicrmw: @@ -11576,7 +11576,7 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_one_as_monotonic_monotonic_ret_cmpxchg: @@ -11587,7 +11587,7 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_monotonic_monotonic_ret_cmpxchg: @@ -11735,7 +11735,7 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_one_as_acquire_monotonic_ret_cmpxchg: @@ -11747,7 +11747,7 @@ ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_acquire_monotonic_ret_cmpxchg: @@ -11897,7 +11897,7 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_one_as_release_monotonic_ret_cmpxchg: @@ -11910,7 +11910,7 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_release_monotonic_ret_cmpxchg: @@ -12072,7 +12072,7 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_one_as_acq_rel_monotonic_ret_cmpxchg: @@ -12086,7 +12086,7 @@ ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_acq_rel_monotonic_ret_cmpxchg: @@ -12254,7 +12254,7 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_one_as_seq_cst_monotonic_ret_cmpxchg: @@ -12268,7 +12268,7 @@ ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_seq_cst_monotonic_ret_cmpxchg: @@ -12426,7 +12426,7 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_one_as_monotonic_acquire_ret_cmpxchg: @@ -12438,7 +12438,7 @@ ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_monotonic_acquire_ret_cmpxchg: @@ -12592,7 +12592,7 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_one_as_acquire_acquire_ret_cmpxchg: @@ -12604,7 +12604,7 @@ ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_acquire_acquire_ret_cmpxchg: @@ -12768,7 +12768,7 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_one_as_release_acquire_ret_cmpxchg: @@ -12782,7 +12782,7 @@ ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_release_acquire_ret_cmpxchg: @@ -12950,7 +12950,7 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_one_as_acq_rel_acquire_ret_cmpxchg: @@ -12964,7 +12964,7 @@ ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_acq_rel_acquire_ret_cmpxchg: @@ -13132,7 +13132,7 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_one_as_seq_cst_acquire_ret_cmpxchg: @@ -13146,7 +13146,7 @@ ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_seq_cst_acquire_ret_cmpxchg: @@ -13314,7 +13314,7 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_one_as_monotonic_seq_cst_ret_cmpxchg: @@ -13328,7 +13328,7 @@ ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_monotonic_seq_cst_ret_cmpxchg: @@ -13496,7 +13496,7 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_one_as_acquire_seq_cst_ret_cmpxchg: @@ -13510,7 +13510,7 @@ ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_acquire_seq_cst_ret_cmpxchg: @@ -13678,7 +13678,7 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_one_as_release_seq_cst_ret_cmpxchg: @@ -13692,7 +13692,7 @@ ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_release_seq_cst_ret_cmpxchg: @@ -13860,7 +13860,7 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: @@ -13874,7 +13874,7 @@ ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: @@ -14042,7 +14042,7 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: @@ -14056,7 +14056,7 @@ ; GFX940-TGSPLIT-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll @@ -99,7 +99,7 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s3 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_nontemporal_load_0: @@ -112,7 +112,7 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s3 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_nontemporal_load_0: @@ -245,7 +245,7 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s3 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_nontemporal_load_1: @@ -259,7 +259,7 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s3 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_nontemporal_load_1: @@ -389,7 +389,7 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s2 ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s3 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 nt +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 nt sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_nontemporal_store_0: @@ -402,7 +402,7 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s2 ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s3 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 nt +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 nt sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_nontemporal_store_0: @@ -535,7 +535,7 @@ ; GFX940-NOTTGSPLIT-NEXT: flat_load_dword v2, v[2:3] ; GFX940-NOTTGSPLIT-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 nt +; GFX940-NOTTGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 nt sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: flat_nontemporal_store_1: @@ -549,7 +549,7 @@ ; GFX940-TGSPLIT-NEXT: flat_load_dword v2, v[2:3] ; GFX940-TGSPLIT-NEXT: v_lshl_add_u64 v[0:1], s[2:3], 0, v[0:1] ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 nt +; GFX940-TGSPLIT-NEXT: flat_store_dword v[0:1], v2 sc0 nt sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: flat_nontemporal_store_1: diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll @@ -102,7 +102,7 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_unordered_load: @@ -112,7 +112,7 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] +; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_unordered_load: @@ -234,7 +234,7 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_monotonic_load: @@ -244,7 +244,7 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] +; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_monotonic_load: @@ -375,7 +375,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_acquire_load: @@ -386,7 +386,7 @@ ; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] +; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_acquire_load: @@ -526,7 +526,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_load: @@ -537,7 +537,7 @@ ; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] +; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_seq_cst_load: @@ -657,7 +657,7 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_unordered_store: @@ -667,7 +667,7 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] +; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_unordered_store: @@ -780,7 +780,7 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc1 +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_monotonic_store: @@ -790,7 +790,7 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc1 +; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_monotonic_store: @@ -914,7 +914,7 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc1 +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_release_store: @@ -926,7 +926,7 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc1 +; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_release_store: @@ -1054,7 +1054,7 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc1 +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_store: @@ -1066,7 +1066,7 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc1 +; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_seq_cst_store: @@ -1943,7 +1943,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[2:3] sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_acquire_ret_atomicrmw: @@ -1956,7 +1956,7 @@ ; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[2:3] sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] +; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_acquire_ret_atomicrmw: @@ -2114,7 +2114,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[2:3] sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_acq_rel_ret_atomicrmw: @@ -2129,7 +2129,7 @@ ; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[2:3] sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] +; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_acq_rel_ret_atomicrmw: @@ -2291,7 +2291,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[2:3] sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_ret_atomicrmw: @@ -2306,7 +2306,7 @@ ; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[2:3] sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] +; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_seq_cst_ret_atomicrmw: @@ -4767,7 +4767,7 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg: @@ -4778,7 +4778,7 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg: @@ -4924,7 +4924,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_acquire_monotonic_ret_cmpxchg: @@ -4936,7 +4936,7 @@ ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_acquire_monotonic_ret_cmpxchg: @@ -5088,7 +5088,7 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_release_monotonic_ret_cmpxchg: @@ -5101,7 +5101,7 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_release_monotonic_ret_cmpxchg: @@ -5262,7 +5262,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg: @@ -5276,7 +5276,7 @@ ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg: @@ -5441,7 +5441,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg: @@ -5455,7 +5455,7 @@ ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg: @@ -5609,7 +5609,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_monotonic_acquire_ret_cmpxchg: @@ -5621,7 +5621,7 @@ ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_monotonic_acquire_ret_cmpxchg: @@ -5771,7 +5771,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_acquire_acquire_ret_cmpxchg: @@ -5783,7 +5783,7 @@ ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_acquire_acquire_ret_cmpxchg: @@ -5944,7 +5944,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_release_acquire_ret_cmpxchg: @@ -5958,7 +5958,7 @@ ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_release_acquire_ret_cmpxchg: @@ -6123,7 +6123,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg: @@ -6137,7 +6137,7 @@ ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg: @@ -6302,7 +6302,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg: @@ -6316,7 +6316,7 @@ ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg: @@ -6481,7 +6481,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg: @@ -6495,7 +6495,7 @@ ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg: @@ -6660,7 +6660,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg: @@ -6674,7 +6674,7 @@ ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg: @@ -6839,7 +6839,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_release_seq_cst_ret_cmpxchg: @@ -6853,7 +6853,7 @@ ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_release_seq_cst_ret_cmpxchg: @@ -7018,7 +7018,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg: @@ -7032,7 +7032,7 @@ ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg: @@ -7197,7 +7197,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg: @@ -7211,7 +7211,7 @@ ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg: @@ -7345,7 +7345,7 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_one_as_unordered_load: @@ -7355,7 +7355,7 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] +; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_unordered_load: @@ -7477,7 +7477,7 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_one_as_monotonic_load: @@ -7487,7 +7487,7 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] +; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_monotonic_load: @@ -7618,7 +7618,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_one_as_acquire_load: @@ -7629,7 +7629,7 @@ ; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] +; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_acquire_load: @@ -7769,7 +7769,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_load: @@ -7780,7 +7780,7 @@ ; GFX940-TGSPLIT-NEXT: global_load_dword v1, v0, s[0:1] sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] +; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_load: @@ -7900,7 +7900,7 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_one_as_unordered_store: @@ -7910,7 +7910,7 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] +; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_unordered_store: @@ -8023,7 +8023,7 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc1 +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_one_as_monotonic_store: @@ -8033,7 +8033,7 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, 0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc1 +; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_monotonic_store: @@ -8157,7 +8157,7 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc1 +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_one_as_release_store: @@ -8169,7 +8169,7 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc1 +; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_release_store: @@ -8297,7 +8297,7 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-NOTTGSPLIT-NEXT: buffer_wbl2 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc1 +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_store: @@ -8309,7 +8309,7 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s4 ; GFX940-TGSPLIT-NEXT: buffer_wbl2 sc1 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc1 +; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_store: @@ -9186,7 +9186,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[2:3] sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_one_as_acquire_ret_atomicrmw: @@ -9199,7 +9199,7 @@ ; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[2:3] sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] +; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_acquire_ret_atomicrmw: @@ -9357,7 +9357,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[2:3] sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw: @@ -9372,7 +9372,7 @@ ; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[2:3] sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] +; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw: @@ -9534,7 +9534,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[2:3] sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw: @@ -9549,7 +9549,7 @@ ; GFX940-TGSPLIT-NEXT: global_atomic_swap v1, v0, v1, s[2:3] sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] +; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw: @@ -12010,7 +12010,7 @@ ; GFX940-NOTTGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg: @@ -12021,7 +12021,7 @@ ; GFX940-TGSPLIT-NEXT: v_mov_b64_e32 v[0:1], s[2:3] ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg: @@ -12167,7 +12167,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg: @@ -12179,7 +12179,7 @@ ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg: @@ -12340,7 +12340,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg: @@ -12354,7 +12354,7 @@ ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg: @@ -12519,7 +12519,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg: @@ -12533,7 +12533,7 @@ ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg: @@ -12687,7 +12687,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg: @@ -12699,7 +12699,7 @@ ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg: @@ -12849,7 +12849,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg: @@ -12861,7 +12861,7 @@ ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg: @@ -13022,7 +13022,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg: @@ -13036,7 +13036,7 @@ ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg: @@ -13201,7 +13201,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg: @@ -13215,7 +13215,7 @@ ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg: @@ -13380,7 +13380,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg: @@ -13394,7 +13394,7 @@ ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg: @@ -13559,7 +13559,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg: @@ -13573,7 +13573,7 @@ ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg: @@ -13738,7 +13738,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg: @@ -13752,7 +13752,7 @@ ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg: @@ -13917,7 +13917,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg: @@ -13931,7 +13931,7 @@ ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg: @@ -14096,7 +14096,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: @@ -14110,7 +14110,7 @@ ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg: @@ -14275,7 +14275,7 @@ ; GFX940-NOTTGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: buffer_inv sc1 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: @@ -14289,7 +14289,7 @@ ; GFX940-TGSPLIT-NEXT: global_atomic_cmpswap v0, v2, v[0:1], s[0:1] offset:16 sc0 ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) ; GFX940-TGSPLIT-NEXT: buffer_inv sc1 -; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] +; GFX940-TGSPLIT-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll @@ -104,7 +104,7 @@ ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_nontemporal_load_0: @@ -115,7 +115,7 @@ ; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] +; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_nontemporal_load_0: @@ -248,7 +248,7 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: global_load_dword v0, v0, s[0:1] nt ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v1, v0, s[2:3] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v1, v0, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_nontemporal_load_1: @@ -259,7 +259,7 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: global_load_dword v0, v0, s[0:1] nt ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v1, v0, s[2:3] +; GFX940-TGSPLIT-NEXT: global_store_dword v1, v0, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_nontemporal_load_1: @@ -385,7 +385,7 @@ ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] nt +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 nt sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_nontemporal_store_0: @@ -396,7 +396,7 @@ ; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] nt +; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 nt sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_nontemporal_store_0: @@ -524,7 +524,7 @@ ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] nt +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 nt sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: global_nontemporal_store_1: @@ -535,7 +535,7 @@ ; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] nt +; GFX940-TGSPLIT-NEXT: global_store_dword v0, v1, s[2:3] sc0 nt sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: global_nontemporal_store_1: diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll @@ -138,7 +138,7 @@ ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: scratch_load_dword v0, off, s4 nt ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v1, v0, s[2:3] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v1, v0, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: private_nontemporal_load_0: @@ -149,7 +149,7 @@ ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: scratch_load_dword v0, off, s4 nt ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v1, v0, s[2:3] +; GFX940-TGSPLIT-NEXT: global_store_dword v1, v0, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: private_nontemporal_load_0: @@ -315,7 +315,7 @@ ; GFX940-NOTTGSPLIT-NEXT: v_lshl_add_u32 v0, v0, 2, s4 ; GFX940-NOTTGSPLIT-NEXT: scratch_load_dword v0, v0, off nt ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-NOTTGSPLIT-NEXT: global_store_dword v1, v0, s[2:3] +; GFX940-NOTTGSPLIT-NEXT: global_store_dword v1, v0, s[2:3] sc0 sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: private_nontemporal_load_1: @@ -327,7 +327,7 @@ ; GFX940-TGSPLIT-NEXT: v_lshl_add_u32 v0, v0, 2, s4 ; GFX940-TGSPLIT-NEXT: scratch_load_dword v0, v0, off nt ; GFX940-TGSPLIT-NEXT: s_waitcnt vmcnt(0) -; GFX940-TGSPLIT-NEXT: global_store_dword v1, v0, s[2:3] +; GFX940-TGSPLIT-NEXT: global_store_dword v1, v0, s[2:3] sc0 sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: private_nontemporal_load_1: @@ -491,7 +491,7 @@ ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[2:3], 0x0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-NOTTGSPLIT-NEXT: scratch_store_dword off, v0, s4 nt +; GFX940-NOTTGSPLIT-NEXT: scratch_store_dword off, v0, s4 sc0 nt sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: private_nontemporal_store_0: @@ -502,7 +502,7 @@ ; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[2:3], 0x0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v0, s0 -; GFX940-TGSPLIT-NEXT: scratch_store_dword off, v0, s4 nt +; GFX940-TGSPLIT-NEXT: scratch_store_dword off, v0, s4 sc0 nt sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: private_nontemporal_store_0: @@ -666,7 +666,7 @@ ; GFX940-NOTTGSPLIT-NEXT: s_load_dword s0, s[2:3], 0x0 ; GFX940-NOTTGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NOTTGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-NOTTGSPLIT-NEXT: scratch_store_dword v0, v1, off nt +; GFX940-NOTTGSPLIT-NEXT: scratch_store_dword v0, v1, off sc0 nt sc1 ; GFX940-NOTTGSPLIT-NEXT: s_endpgm ; ; GFX940-TGSPLIT-LABEL: private_nontemporal_store_1: @@ -678,7 +678,7 @@ ; GFX940-TGSPLIT-NEXT: s_load_dword s0, s[2:3], 0x0 ; GFX940-TGSPLIT-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-TGSPLIT-NEXT: v_mov_b32_e32 v1, s0 -; GFX940-TGSPLIT-NEXT: scratch_store_dword v0, v1, off nt +; GFX940-TGSPLIT-NEXT: scratch_store_dword v0, v1, off sc0 nt sc1 ; GFX940-TGSPLIT-NEXT: s_endpgm ; ; GFX11-WGP-LABEL: private_nontemporal_store_1: