Index: llvm/docs/AMDGPUUsage.rst =================================================================== --- llvm/docs/AMDGPUUsage.rst +++ llvm/docs/AMDGPUUsage.rst @@ -102,7 +102,7 @@ Use the Clang options ``-mcpu=`` or ``--offload-arch=`` to specify the AMDGPU processor together with optional target features. See :ref:`amdgpu-target-id` and :ref:`amdgpu-target-features` for AMD GPU target -specific information. +specific information. Every processor is supported for every ABI (amdhsa, amdpal, mesa3d). .. table:: AMDGPU Processors :name: amdgpu-processor-table Index: llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -50,15 +50,16 @@ class AMDGPUSubtarget { public: enum Generation { - R600 = 0, - R700 = 1, - EVERGREEN = 2, - NORTHERN_ISLANDS = 3, - SOUTHERN_ISLANDS = 4, - SEA_ISLANDS = 5, - VOLCANIC_ISLANDS = 6, - GFX9 = 7, - GFX10 = 8 + INVALID = 0, + R600 = 1, + R700 = 2, + EVERGREEN = 3, + NORTHERN_ISLANDS = 4, + SOUTHERN_ISLANDS = 5, + SEA_ISLANDS = 6, + VOLCANIC_ISLANDS = 7, + GFX9 = 8, + GFX10 = 9 }; private: Index: llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -107,6 +107,14 @@ FullFS += FS; ParseSubtargetFeatures(GPU, /*TuneCPU*/ GPU, FullFS); + + // Implement the "generic" processors, which acts as the default when no + // generation features are enabled (e.g for -mcpu=''). There are two variants + // one for hsa and one for everything else. + if (Gen == AMDGPUSubtarget::INVALID) { + Gen = TT.getOS() == Triple::AMDHSA ? AMDGPUSubtarget::SEA_ISLANDS + : AMDGPUSubtarget::SOUTHERN_ISLANDS; + } // We don't support FP64 for EG/NI atm. assert(!hasFP64() || (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)); @@ -118,6 +126,11 @@ FlatForGlobal = true; } + // Use MUBUF instructions for global address space access in GFX60x + if (getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) { + FlatForGlobal = false; + } + // Set defaults if needed. if (MaxPrivateElementSize == 0) MaxPrivateElementSize = 4; @@ -182,7 +195,7 @@ AMDGPUGenSubtargetInfo(TT, GPU, /*TuneCPU*/ GPU, FS), AMDGPUSubtarget(TT), TargetTriple(TT), - Gen(TT.getOS() == Triple::AMDHSA ? SEA_ISLANDS : SOUTHERN_ISLANDS), + Gen(INVALID), InstrItins(getInstrItineraryForCPU(GPU)), LDSBankCount(0), MaxPrivateElementSize(0), Index: llvm/test/CodeGen/AMDGPU/lower-kernargs.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/lower-kernargs.ll +++ llvm/test/CodeGen/AMDGPU/lower-kernargs.ll @@ -533,10 +533,7 @@ define amdgpu_kernel void @kern_lds_ptr_si(i32 addrspace(3)* %lds) #2 { ; HSA-LABEL: @kern_lds_ptr_si( ; HSA-NEXT: [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() -; HSA-NEXT: [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_LDS_PTR_SI_KERNARG_SEGMENT]], i64 0 -; HSA-NEXT: [[LDS_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[LDS_KERNARG_OFFSET]] to i32 addrspace(3)* addrspace(4)* -; HSA-NEXT: [[LDS_LOAD:%.*]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LDS_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 -; HSA-NEXT: store i32 0, i32 addrspace(3)* [[LDS_LOAD]], align 4 +; HSA-NEXT: store i32 0, i32 addrspace(3)* [[LDS:%.*]], align 4 ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_lds_ptr_si( Index: llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll +++ llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s Index: llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll +++ llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s @@ -8,8 +8,8 @@ define amdgpu_kernel void @global_agent_unordered_load( ; GFX6-LABEL: global_agent_unordered_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -82,8 +82,8 @@ define amdgpu_kernel void @global_agent_monotonic_load( ; GFX6-LABEL: global_agent_monotonic_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -156,8 +156,8 @@ define amdgpu_kernel void @global_agent_acquire_load( ; GFX6-LABEL: global_agent_acquire_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -236,8 +236,8 @@ define amdgpu_kernel void @global_agent_seq_cst_load( ; GFX6-LABEL: global_agent_seq_cst_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -321,12 +321,12 @@ define amdgpu_kernel void @global_agent_unordered_store( ; GFX6-LABEL: global_agent_unordered_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -382,12 +382,12 @@ define amdgpu_kernel void @global_agent_monotonic_store( ; GFX6-LABEL: global_agent_monotonic_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -443,12 +443,12 @@ define amdgpu_kernel void @global_agent_release_store( ; GFX6-LABEL: global_agent_release_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm @@ -511,12 +511,12 @@ define amdgpu_kernel void @global_agent_seq_cst_store( ; GFX6-LABEL: global_agent_seq_cst_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm @@ -579,13 +579,13 @@ define amdgpu_kernel void @global_agent_monotonic_atomicrmw( ; GFX6-LABEL: global_agent_monotonic_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_agent_monotonic_atomicrmw: @@ -640,13 +640,13 @@ define amdgpu_kernel void @global_agent_acquire_atomicrmw( ; GFX6-LABEL: global_agent_acquire_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -714,14 +714,14 @@ define amdgpu_kernel void @global_agent_release_atomicrmw( ; GFX6-LABEL: global_agent_release_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_agent_release_atomicrmw: @@ -782,14 +782,14 @@ define amdgpu_kernel void @global_agent_acq_rel_atomicrmw( ; GFX6-LABEL: global_agent_acq_rel_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -863,14 +863,14 @@ define amdgpu_kernel void @global_agent_seq_cst_atomicrmw( ; GFX6-LABEL: global_agent_seq_cst_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -944,16 +944,16 @@ define amdgpu_kernel void @global_agent_acquire_ret_atomicrmw( ; GFX6-LABEL: global_agent_acquire_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_agent_acquire_ret_atomicrmw: @@ -1022,17 +1022,17 @@ define amdgpu_kernel void @global_agent_acq_rel_ret_atomicrmw( ; GFX6-LABEL: global_agent_acq_rel_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_agent_acq_rel_ret_atomicrmw: @@ -1107,17 +1107,17 @@ define amdgpu_kernel void @global_agent_seq_cst_ret_atomicrmw( ; GFX6-LABEL: global_agent_seq_cst_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_agent_seq_cst_ret_atomicrmw: @@ -1192,14 +1192,14 @@ define amdgpu_kernel void @global_agent_monotonic_monotonic_cmpxchg( ; GFX6-LABEL: global_agent_monotonic_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_agent_monotonic_monotonic_cmpxchg: @@ -1261,14 +1261,14 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_cmpxchg( ; GFX6-LABEL: global_agent_acquire_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -1343,15 +1343,15 @@ define amdgpu_kernel void @global_agent_release_monotonic_cmpxchg( ; GFX6-LABEL: global_agent_release_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_agent_release_monotonic_cmpxchg: @@ -1419,15 +1419,15 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_cmpxchg( ; GFX6-LABEL: global_agent_acq_rel_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -1508,15 +1508,15 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_cmpxchg( ; GFX6-LABEL: global_agent_seq_cst_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -1597,14 +1597,14 @@ define amdgpu_kernel void @global_agent_acquire_acquire_cmpxchg( ; GFX6-LABEL: global_agent_acquire_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -1679,15 +1679,15 @@ define amdgpu_kernel void @global_agent_release_acquire_cmpxchg( ; GFX6-LABEL: global_agent_release_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -1768,15 +1768,15 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_cmpxchg( ; GFX6-LABEL: global_agent_acq_rel_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -1857,15 +1857,15 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_cmpxchg( ; GFX6-LABEL: global_agent_seq_cst_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -1946,15 +1946,15 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_cmpxchg( ; GFX6-LABEL: global_agent_seq_cst_seq_cst_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -2035,17 +2035,17 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_ret_cmpxchg( ; GFX6-LABEL: global_agent_acquire_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_agent_acquire_monotonic_ret_cmpxchg: @@ -2124,18 +2124,18 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_ret_cmpxchg( ; GFX6-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg: @@ -2220,18 +2220,18 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_ret_cmpxchg( ; GFX6-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg: @@ -2316,17 +2316,17 @@ define amdgpu_kernel void @global_agent_acquire_acquire_ret_cmpxchg( ; GFX6-LABEL: global_agent_acquire_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_agent_acquire_acquire_ret_cmpxchg: @@ -2405,18 +2405,18 @@ define amdgpu_kernel void @global_agent_release_acquire_ret_cmpxchg( ; GFX6-LABEL: global_agent_release_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_agent_release_acquire_ret_cmpxchg: @@ -2501,18 +2501,18 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_ret_cmpxchg( ; GFX6-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg: @@ -2597,18 +2597,18 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_ret_cmpxchg( ; GFX6-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg: @@ -2693,18 +2693,18 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_ret_cmpxchg( ; GFX6-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg: @@ -2789,8 +2789,8 @@ define amdgpu_kernel void @global_agent_one_as_unordered_load( ; GFX6-LABEL: global_agent_one_as_unordered_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -2863,8 +2863,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_load( ; GFX6-LABEL: global_agent_one_as_monotonic_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -2937,8 +2937,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_load( ; GFX6-LABEL: global_agent_one_as_acquire_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -3017,8 +3017,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_load( ; GFX6-LABEL: global_agent_one_as_seq_cst_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -3102,12 +3102,12 @@ define amdgpu_kernel void @global_agent_one_as_unordered_store( ; GFX6-LABEL: global_agent_one_as_unordered_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -3163,12 +3163,12 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_store( ; GFX6-LABEL: global_agent_one_as_monotonic_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -3224,12 +3224,12 @@ define amdgpu_kernel void @global_agent_one_as_release_store( ; GFX6-LABEL: global_agent_one_as_release_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm @@ -3292,12 +3292,12 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_store( ; GFX6-LABEL: global_agent_one_as_seq_cst_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm @@ -3360,13 +3360,13 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_atomicrmw( ; GFX6-LABEL: global_agent_one_as_monotonic_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_agent_one_as_monotonic_atomicrmw: @@ -3421,13 +3421,13 @@ define amdgpu_kernel void @global_agent_one_as_acquire_atomicrmw( ; GFX6-LABEL: global_agent_one_as_acquire_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -3493,14 +3493,14 @@ define amdgpu_kernel void @global_agent_one_as_release_atomicrmw( ; GFX6-LABEL: global_agent_one_as_release_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_agent_one_as_release_atomicrmw: @@ -3561,14 +3561,14 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_atomicrmw( ; GFX6-LABEL: global_agent_one_as_acq_rel_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -3640,14 +3640,14 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_atomicrmw( ; GFX6-LABEL: global_agent_one_as_seq_cst_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -3719,16 +3719,16 @@ define amdgpu_kernel void @global_agent_one_as_acquire_ret_atomicrmw( ; GFX6-LABEL: global_agent_one_as_acquire_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_agent_one_as_acquire_ret_atomicrmw: @@ -3797,17 +3797,17 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_ret_atomicrmw( ; GFX6-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw: @@ -3882,17 +3882,17 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_ret_atomicrmw( ; GFX6-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw: @@ -3967,14 +3967,14 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_monotonic_cmpxchg( ; GFX6-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg: @@ -4036,14 +4036,14 @@ define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_cmpxchg( ; GFX6-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -4116,15 +4116,15 @@ define amdgpu_kernel void @global_agent_one_as_release_monotonic_cmpxchg( ; GFX6-LABEL: global_agent_one_as_release_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_agent_one_as_release_monotonic_cmpxchg: @@ -4192,15 +4192,15 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_cmpxchg( ; GFX6-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -4279,15 +4279,15 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_cmpxchg( ; GFX6-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -4366,14 +4366,14 @@ define amdgpu_kernel void @global_agent_one_as_acquire_acquire_cmpxchg( ; GFX6-LABEL: global_agent_one_as_acquire_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -4446,15 +4446,15 @@ define amdgpu_kernel void @global_agent_one_as_release_acquire_cmpxchg( ; GFX6-LABEL: global_agent_one_as_release_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -4533,15 +4533,15 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_cmpxchg( ; GFX6-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -4620,15 +4620,15 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_cmpxchg( ; GFX6-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -4707,15 +4707,15 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_cmpxchg( ; GFX6-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -4794,17 +4794,17 @@ define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_ret_cmpxchg( ; GFX6-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg: @@ -4883,18 +4883,18 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX6-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg: @@ -4979,18 +4979,18 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX6-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg: @@ -5075,17 +5075,17 @@ define amdgpu_kernel void @global_agent_one_as_acquire_acquire_ret_cmpxchg( ; GFX6-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg: @@ -5164,18 +5164,18 @@ define amdgpu_kernel void @global_agent_one_as_release_acquire_ret_cmpxchg( ; GFX6-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg: @@ -5260,18 +5260,18 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX6-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg: @@ -5356,18 +5356,18 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX6-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg: @@ -5452,18 +5452,18 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX6-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: Index: llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll +++ llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s @@ -8,8 +8,8 @@ define amdgpu_kernel void @global_nontemporal_load_0( ; GFX6-LABEL: global_nontemporal_load_0: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s7, 0x100f000 ; GFX6-NEXT: s_mov_b32 s6, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 @@ -77,8 +77,8 @@ define amdgpu_kernel void @global_nontemporal_load_1( ; GFX6-LABEL: global_nontemporal_load_1: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX6-NEXT: v_mov_b32_e32 v1, 0 ; GFX6-NEXT: s_mov_b32 s2, -1 @@ -157,8 +157,8 @@ define amdgpu_kernel void @global_nontemporal_store_0( ; GFX6-LABEL: global_nontemporal_store_0: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s7, 0x100f000 ; GFX6-NEXT: s_mov_b32 s6, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 @@ -226,8 +226,8 @@ define amdgpu_kernel void @global_nontemporal_store_1( ; GFX6-LABEL: global_nontemporal_store_1: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s7, 0x100f000 ; GFX6-NEXT: s_mov_b32 s6, 0 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX6-NEXT: v_mov_b32_e32 v1, 0 Index: llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll +++ llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s @@ -8,8 +8,8 @@ define amdgpu_kernel void @global_singlethread_unordered_load( ; GFX6-LABEL: global_singlethread_unordered_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -82,8 +82,8 @@ define amdgpu_kernel void @global_singlethread_monotonic_load( ; GFX6-LABEL: global_singlethread_monotonic_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -156,8 +156,8 @@ define amdgpu_kernel void @global_singlethread_acquire_load( ; GFX6-LABEL: global_singlethread_acquire_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -230,8 +230,8 @@ define amdgpu_kernel void @global_singlethread_seq_cst_load( ; GFX6-LABEL: global_singlethread_seq_cst_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -304,12 +304,12 @@ define amdgpu_kernel void @global_singlethread_unordered_store( ; GFX6-LABEL: global_singlethread_unordered_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -365,12 +365,12 @@ define amdgpu_kernel void @global_singlethread_monotonic_store( ; GFX6-LABEL: global_singlethread_monotonic_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -426,12 +426,12 @@ define amdgpu_kernel void @global_singlethread_release_store( ; GFX6-LABEL: global_singlethread_release_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -487,12 +487,12 @@ define amdgpu_kernel void @global_singlethread_seq_cst_store( ; GFX6-LABEL: global_singlethread_seq_cst_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -548,13 +548,13 @@ define amdgpu_kernel void @global_singlethread_monotonic_atomicrmw( ; GFX6-LABEL: global_singlethread_monotonic_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_monotonic_atomicrmw: @@ -609,13 +609,13 @@ define amdgpu_kernel void @global_singlethread_acquire_atomicrmw( ; GFX6-LABEL: global_singlethread_acquire_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_acquire_atomicrmw: @@ -670,13 +670,13 @@ define amdgpu_kernel void @global_singlethread_release_atomicrmw( ; GFX6-LABEL: global_singlethread_release_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_release_atomicrmw: @@ -731,13 +731,13 @@ define amdgpu_kernel void @global_singlethread_acq_rel_atomicrmw( ; GFX6-LABEL: global_singlethread_acq_rel_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_acq_rel_atomicrmw: @@ -792,13 +792,13 @@ define amdgpu_kernel void @global_singlethread_seq_cst_atomicrmw( ; GFX6-LABEL: global_singlethread_seq_cst_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_seq_cst_atomicrmw: @@ -853,15 +853,15 @@ define amdgpu_kernel void @global_singlethread_acquire_ret_atomicrmw( ; GFX6-LABEL: global_singlethread_acquire_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_acquire_ret_atomicrmw: @@ -925,15 +925,15 @@ define amdgpu_kernel void @global_singlethread_acq_rel_ret_atomicrmw( ; GFX6-LABEL: global_singlethread_acq_rel_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_acq_rel_ret_atomicrmw: @@ -997,15 +997,15 @@ define amdgpu_kernel void @global_singlethread_seq_cst_ret_atomicrmw( ; GFX6-LABEL: global_singlethread_seq_cst_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_seq_cst_ret_atomicrmw: @@ -1069,14 +1069,14 @@ define amdgpu_kernel void @global_singlethread_monotonic_monotonic_cmpxchg( ; GFX6-LABEL: global_singlethread_monotonic_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_monotonic_monotonic_cmpxchg: @@ -1138,14 +1138,14 @@ define amdgpu_kernel void @global_singlethread_acquire_monotonic_cmpxchg( ; GFX6-LABEL: global_singlethread_acquire_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_acquire_monotonic_cmpxchg: @@ -1207,14 +1207,14 @@ define amdgpu_kernel void @global_singlethread_release_monotonic_cmpxchg( ; GFX6-LABEL: global_singlethread_release_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_release_monotonic_cmpxchg: @@ -1276,14 +1276,14 @@ define amdgpu_kernel void @global_singlethread_acq_rel_monotonic_cmpxchg( ; GFX6-LABEL: global_singlethread_acq_rel_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_acq_rel_monotonic_cmpxchg: @@ -1345,14 +1345,14 @@ define amdgpu_kernel void @global_singlethread_seq_cst_monotonic_cmpxchg( ; GFX6-LABEL: global_singlethread_seq_cst_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_seq_cst_monotonic_cmpxchg: @@ -1414,14 +1414,14 @@ define amdgpu_kernel void @global_singlethread_acquire_acquire_cmpxchg( ; GFX6-LABEL: global_singlethread_acquire_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_acquire_acquire_cmpxchg: @@ -1483,14 +1483,14 @@ define amdgpu_kernel void @global_singlethread_release_acquire_cmpxchg( ; GFX6-LABEL: global_singlethread_release_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_release_acquire_cmpxchg: @@ -1552,14 +1552,14 @@ define amdgpu_kernel void @global_singlethread_acq_rel_acquire_cmpxchg( ; GFX6-LABEL: global_singlethread_acq_rel_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_acq_rel_acquire_cmpxchg: @@ -1621,14 +1621,14 @@ define amdgpu_kernel void @global_singlethread_seq_cst_acquire_cmpxchg( ; GFX6-LABEL: global_singlethread_seq_cst_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_seq_cst_acquire_cmpxchg: @@ -1690,14 +1690,14 @@ define amdgpu_kernel void @global_singlethread_seq_cst_seq_cst_cmpxchg( ; GFX6-LABEL: global_singlethread_seq_cst_seq_cst_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_seq_cst_seq_cst_cmpxchg: @@ -1759,16 +1759,16 @@ define amdgpu_kernel void @global_singlethread_acquire_monotonic_ret_cmpxchg( ; GFX6-LABEL: global_singlethread_acquire_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_acquire_monotonic_ret_cmpxchg: @@ -1842,16 +1842,16 @@ define amdgpu_kernel void @global_singlethread_acq_rel_monotonic_ret_cmpxchg( ; GFX6-LABEL: global_singlethread_acq_rel_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_acq_rel_monotonic_ret_cmpxchg: @@ -1925,16 +1925,16 @@ define amdgpu_kernel void @global_singlethread_seq_cst_monotonic_ret_cmpxchg( ; GFX6-LABEL: global_singlethread_seq_cst_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_seq_cst_monotonic_ret_cmpxchg: @@ -2008,16 +2008,16 @@ define amdgpu_kernel void @global_singlethread_acquire_acquire_ret_cmpxchg( ; GFX6-LABEL: global_singlethread_acquire_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_acquire_acquire_ret_cmpxchg: @@ -2091,16 +2091,16 @@ define amdgpu_kernel void @global_singlethread_release_acquire_ret_cmpxchg( ; GFX6-LABEL: global_singlethread_release_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_release_acquire_ret_cmpxchg: @@ -2174,16 +2174,16 @@ define amdgpu_kernel void @global_singlethread_acq_rel_acquire_ret_cmpxchg( ; GFX6-LABEL: global_singlethread_acq_rel_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_acq_rel_acquire_ret_cmpxchg: @@ -2257,16 +2257,16 @@ define amdgpu_kernel void @global_singlethread_seq_cst_acquire_ret_cmpxchg( ; GFX6-LABEL: global_singlethread_seq_cst_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_seq_cst_acquire_ret_cmpxchg: @@ -2340,16 +2340,16 @@ define amdgpu_kernel void @global_singlethread_seq_cst_seq_cst_ret_cmpxchg( ; GFX6-LABEL: global_singlethread_seq_cst_seq_cst_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_seq_cst_seq_cst_ret_cmpxchg: @@ -2423,8 +2423,8 @@ define amdgpu_kernel void @global_singlethread_one_as_unordered_load( ; GFX6-LABEL: global_singlethread_one_as_unordered_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -2497,8 +2497,8 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_load( ; GFX6-LABEL: global_singlethread_one_as_monotonic_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -2571,8 +2571,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_load( ; GFX6-LABEL: global_singlethread_one_as_acquire_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -2645,8 +2645,8 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_load( ; GFX6-LABEL: global_singlethread_one_as_seq_cst_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -2719,12 +2719,12 @@ define amdgpu_kernel void @global_singlethread_one_as_unordered_store( ; GFX6-LABEL: global_singlethread_one_as_unordered_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -2780,12 +2780,12 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_store( ; GFX6-LABEL: global_singlethread_one_as_monotonic_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -2841,12 +2841,12 @@ define amdgpu_kernel void @global_singlethread_one_as_release_store( ; GFX6-LABEL: global_singlethread_one_as_release_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -2902,12 +2902,12 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_store( ; GFX6-LABEL: global_singlethread_one_as_seq_cst_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -2963,13 +2963,13 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_atomicrmw( ; GFX6-LABEL: global_singlethread_one_as_monotonic_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_one_as_monotonic_atomicrmw: @@ -3024,13 +3024,13 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_atomicrmw( ; GFX6-LABEL: global_singlethread_one_as_acquire_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_one_as_acquire_atomicrmw: @@ -3085,13 +3085,13 @@ define amdgpu_kernel void @global_singlethread_one_as_release_atomicrmw( ; GFX6-LABEL: global_singlethread_one_as_release_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_one_as_release_atomicrmw: @@ -3146,13 +3146,13 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_atomicrmw( ; GFX6-LABEL: global_singlethread_one_as_acq_rel_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_one_as_acq_rel_atomicrmw: @@ -3207,13 +3207,13 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_atomicrmw( ; GFX6-LABEL: global_singlethread_one_as_seq_cst_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_one_as_seq_cst_atomicrmw: @@ -3268,15 +3268,15 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_ret_atomicrmw( ; GFX6-LABEL: global_singlethread_one_as_acquire_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_one_as_acquire_ret_atomicrmw: @@ -3340,15 +3340,15 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_ret_atomicrmw( ; GFX6-LABEL: global_singlethread_one_as_acq_rel_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_one_as_acq_rel_ret_atomicrmw: @@ -3412,15 +3412,15 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_ret_atomicrmw( ; GFX6-LABEL: global_singlethread_one_as_seq_cst_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_one_as_seq_cst_ret_atomicrmw: @@ -3484,14 +3484,14 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_monotonic_cmpxchg( ; GFX6-LABEL: global_singlethread_one_as_monotonic_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_one_as_monotonic_monotonic_cmpxchg: @@ -3553,14 +3553,14 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_monotonic_cmpxchg( ; GFX6-LABEL: global_singlethread_one_as_acquire_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_one_as_acquire_monotonic_cmpxchg: @@ -3622,14 +3622,14 @@ define amdgpu_kernel void @global_singlethread_one_as_release_monotonic_cmpxchg( ; GFX6-LABEL: global_singlethread_one_as_release_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_one_as_release_monotonic_cmpxchg: @@ -3691,14 +3691,14 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_monotonic_cmpxchg( ; GFX6-LABEL: global_singlethread_one_as_acq_rel_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_one_as_acq_rel_monotonic_cmpxchg: @@ -3760,14 +3760,14 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_monotonic_cmpxchg( ; GFX6-LABEL: global_singlethread_one_as_seq_cst_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_one_as_seq_cst_monotonic_cmpxchg: @@ -3829,14 +3829,14 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_acquire_cmpxchg( ; GFX6-LABEL: global_singlethread_one_as_acquire_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_one_as_acquire_acquire_cmpxchg: @@ -3898,14 +3898,14 @@ define amdgpu_kernel void @global_singlethread_one_as_release_acquire_cmpxchg( ; GFX6-LABEL: global_singlethread_one_as_release_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_one_as_release_acquire_cmpxchg: @@ -3967,14 +3967,14 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_acquire_cmpxchg( ; GFX6-LABEL: global_singlethread_one_as_acq_rel_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_one_as_acq_rel_acquire_cmpxchg: @@ -4036,14 +4036,14 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_acquire_cmpxchg( ; GFX6-LABEL: global_singlethread_one_as_seq_cst_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_one_as_seq_cst_acquire_cmpxchg: @@ -4105,14 +4105,14 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_seq_cst_cmpxchg( ; GFX6-LABEL: global_singlethread_one_as_seq_cst_seq_cst_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_one_as_seq_cst_seq_cst_cmpxchg: @@ -4174,16 +4174,16 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_monotonic_ret_cmpxchg( ; GFX6-LABEL: global_singlethread_one_as_acquire_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_one_as_acquire_monotonic_ret_cmpxchg: @@ -4257,16 +4257,16 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX6-LABEL: global_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg: @@ -4340,16 +4340,16 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX6-LABEL: global_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg: @@ -4423,16 +4423,16 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_acquire_ret_cmpxchg( ; GFX6-LABEL: global_singlethread_one_as_acquire_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_one_as_acquire_acquire_ret_cmpxchg: @@ -4506,16 +4506,16 @@ define amdgpu_kernel void @global_singlethread_one_as_release_acquire_ret_cmpxchg( ; GFX6-LABEL: global_singlethread_one_as_release_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_one_as_release_acquire_ret_cmpxchg: @@ -4589,16 +4589,16 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX6-LABEL: global_singlethread_one_as_acq_rel_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_one_as_acq_rel_acquire_ret_cmpxchg: @@ -4672,16 +4672,16 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX6-LABEL: global_singlethread_one_as_seq_cst_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_one_as_seq_cst_acquire_ret_cmpxchg: @@ -4755,16 +4755,16 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX6-LABEL: global_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg: Index: llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll +++ llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s @@ -8,8 +8,8 @@ define amdgpu_kernel void @global_system_unordered_load( ; GFX6-LABEL: global_system_unordered_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -82,8 +82,8 @@ define amdgpu_kernel void @global_system_monotonic_load( ; GFX6-LABEL: global_system_monotonic_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -156,8 +156,8 @@ define amdgpu_kernel void @global_system_acquire_load( ; GFX6-LABEL: global_system_acquire_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -236,8 +236,8 @@ define amdgpu_kernel void @global_system_seq_cst_load( ; GFX6-LABEL: global_system_seq_cst_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -321,12 +321,12 @@ define amdgpu_kernel void @global_system_unordered_store( ; GFX6-LABEL: global_system_unordered_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -382,12 +382,12 @@ define amdgpu_kernel void @global_system_monotonic_store( ; GFX6-LABEL: global_system_monotonic_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -443,12 +443,12 @@ define amdgpu_kernel void @global_system_release_store( ; GFX6-LABEL: global_system_release_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm @@ -511,12 +511,12 @@ define amdgpu_kernel void @global_system_seq_cst_store( ; GFX6-LABEL: global_system_seq_cst_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm @@ -579,13 +579,13 @@ define amdgpu_kernel void @global_system_monotonic_atomicrmw( ; GFX6-LABEL: global_system_monotonic_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_system_monotonic_atomicrmw: @@ -640,13 +640,13 @@ define amdgpu_kernel void @global_system_acquire_atomicrmw( ; GFX6-LABEL: global_system_acquire_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -714,14 +714,14 @@ define amdgpu_kernel void @global_system_release_atomicrmw( ; GFX6-LABEL: global_system_release_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_system_release_atomicrmw: @@ -782,14 +782,14 @@ define amdgpu_kernel void @global_system_acq_rel_atomicrmw( ; GFX6-LABEL: global_system_acq_rel_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -863,14 +863,14 @@ define amdgpu_kernel void @global_system_seq_cst_atomicrmw( ; GFX6-LABEL: global_system_seq_cst_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -944,16 +944,16 @@ define amdgpu_kernel void @global_system_acquire_ret_atomicrmw( ; GFX6-LABEL: global_system_acquire_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_system_acquire_ret_atomicrmw: @@ -1022,17 +1022,17 @@ define amdgpu_kernel void @global_system_acq_rel_ret_atomicrmw( ; GFX6-LABEL: global_system_acq_rel_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_system_acq_rel_ret_atomicrmw: @@ -1107,17 +1107,17 @@ define amdgpu_kernel void @global_system_seq_cst_ret_atomicrmw( ; GFX6-LABEL: global_system_seq_cst_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_system_seq_cst_ret_atomicrmw: @@ -1192,14 +1192,14 @@ define amdgpu_kernel void @global_system_monotonic_monotonic_cmpxchg( ; GFX6-LABEL: global_system_monotonic_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_system_monotonic_monotonic_cmpxchg: @@ -1261,14 +1261,14 @@ define amdgpu_kernel void @global_system_acquire_monotonic_cmpxchg( ; GFX6-LABEL: global_system_acquire_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -1343,15 +1343,15 @@ define amdgpu_kernel void @global_system_release_monotonic_cmpxchg( ; GFX6-LABEL: global_system_release_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_system_release_monotonic_cmpxchg: @@ -1419,15 +1419,15 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_cmpxchg( ; GFX6-LABEL: global_system_acq_rel_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -1508,15 +1508,15 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_cmpxchg( ; GFX6-LABEL: global_system_seq_cst_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -1597,14 +1597,14 @@ define amdgpu_kernel void @global_system_acquire_acquire_cmpxchg( ; GFX6-LABEL: global_system_acquire_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -1679,15 +1679,15 @@ define amdgpu_kernel void @global_system_release_acquire_cmpxchg( ; GFX6-LABEL: global_system_release_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -1768,15 +1768,15 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_cmpxchg( ; GFX6-LABEL: global_system_acq_rel_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -1857,15 +1857,15 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_cmpxchg( ; GFX6-LABEL: global_system_seq_cst_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -1946,15 +1946,15 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_cmpxchg( ; GFX6-LABEL: global_system_seq_cst_seq_cst_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -2035,17 +2035,17 @@ define amdgpu_kernel void @global_system_acquire_monotonic_ret_cmpxchg( ; GFX6-LABEL: global_system_acquire_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_system_acquire_monotonic_ret_cmpxchg: @@ -2124,18 +2124,18 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_ret_cmpxchg( ; GFX6-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg: @@ -2220,18 +2220,18 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_ret_cmpxchg( ; GFX6-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg: @@ -2316,17 +2316,17 @@ define amdgpu_kernel void @global_system_acquire_acquire_ret_cmpxchg( ; GFX6-LABEL: global_system_acquire_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_system_acquire_acquire_ret_cmpxchg: @@ -2405,18 +2405,18 @@ define amdgpu_kernel void @global_system_release_acquire_ret_cmpxchg( ; GFX6-LABEL: global_system_release_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_system_release_acquire_ret_cmpxchg: @@ -2501,18 +2501,18 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_ret_cmpxchg( ; GFX6-LABEL: global_system_acq_rel_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_system_acq_rel_acquire_ret_cmpxchg: @@ -2597,18 +2597,18 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_ret_cmpxchg( ; GFX6-LABEL: global_system_seq_cst_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_system_seq_cst_acquire_ret_cmpxchg: @@ -2693,18 +2693,18 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_ret_cmpxchg( ; GFX6-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg: @@ -2789,8 +2789,8 @@ define amdgpu_kernel void @global_system_one_as_unordered_load( ; GFX6-LABEL: global_system_one_as_unordered_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -2863,8 +2863,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_load( ; GFX6-LABEL: global_system_one_as_monotonic_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -2937,8 +2937,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_load( ; GFX6-LABEL: global_system_one_as_acquire_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -3017,8 +3017,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_load( ; GFX6-LABEL: global_system_one_as_seq_cst_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -3102,12 +3102,12 @@ define amdgpu_kernel void @global_system_one_as_unordered_store( ; GFX6-LABEL: global_system_one_as_unordered_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -3163,12 +3163,12 @@ define amdgpu_kernel void @global_system_one_as_monotonic_store( ; GFX6-LABEL: global_system_one_as_monotonic_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -3224,12 +3224,12 @@ define amdgpu_kernel void @global_system_one_as_release_store( ; GFX6-LABEL: global_system_one_as_release_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm @@ -3292,12 +3292,12 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_store( ; GFX6-LABEL: global_system_one_as_seq_cst_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm @@ -3360,13 +3360,13 @@ define amdgpu_kernel void @global_system_one_as_monotonic_atomicrmw( ; GFX6-LABEL: global_system_one_as_monotonic_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_system_one_as_monotonic_atomicrmw: @@ -3421,13 +3421,13 @@ define amdgpu_kernel void @global_system_one_as_acquire_atomicrmw( ; GFX6-LABEL: global_system_one_as_acquire_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -3493,14 +3493,14 @@ define amdgpu_kernel void @global_system_one_as_release_atomicrmw( ; GFX6-LABEL: global_system_one_as_release_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_system_one_as_release_atomicrmw: @@ -3561,14 +3561,14 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_atomicrmw( ; GFX6-LABEL: global_system_one_as_acq_rel_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -3640,14 +3640,14 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_atomicrmw( ; GFX6-LABEL: global_system_one_as_seq_cst_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -3719,16 +3719,16 @@ define amdgpu_kernel void @global_system_one_as_acquire_ret_atomicrmw( ; GFX6-LABEL: global_system_one_as_acquire_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_system_one_as_acquire_ret_atomicrmw: @@ -3797,17 +3797,17 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_ret_atomicrmw( ; GFX6-LABEL: global_system_one_as_acq_rel_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_system_one_as_acq_rel_ret_atomicrmw: @@ -3882,17 +3882,17 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_ret_atomicrmw( ; GFX6-LABEL: global_system_one_as_seq_cst_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_system_one_as_seq_cst_ret_atomicrmw: @@ -3967,14 +3967,14 @@ define amdgpu_kernel void @global_system_one_as_monotonic_monotonic_cmpxchg( ; GFX6-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg: @@ -4036,14 +4036,14 @@ define amdgpu_kernel void @global_system_one_as_acquire_monotonic_cmpxchg( ; GFX6-LABEL: global_system_one_as_acquire_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -4116,15 +4116,15 @@ define amdgpu_kernel void @global_system_one_as_release_monotonic_cmpxchg( ; GFX6-LABEL: global_system_one_as_release_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_system_one_as_release_monotonic_cmpxchg: @@ -4192,15 +4192,15 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_cmpxchg( ; GFX6-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -4279,15 +4279,15 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_cmpxchg( ; GFX6-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -4366,14 +4366,14 @@ define amdgpu_kernel void @global_system_one_as_acquire_acquire_cmpxchg( ; GFX6-LABEL: global_system_one_as_acquire_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -4446,15 +4446,15 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_cmpxchg( ; GFX6-LABEL: global_system_one_as_release_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -4533,15 +4533,15 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_cmpxchg( ; GFX6-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -4620,15 +4620,15 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_cmpxchg( ; GFX6-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -4707,15 +4707,15 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_cmpxchg( ; GFX6-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 ; GFX6-NEXT: s_endpgm @@ -4794,17 +4794,17 @@ define amdgpu_kernel void @global_system_one_as_acquire_monotonic_ret_cmpxchg( ; GFX6-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg: @@ -4883,18 +4883,18 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX6-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg: @@ -4979,18 +4979,18 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX6-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg: @@ -5075,17 +5075,17 @@ define amdgpu_kernel void @global_system_one_as_acquire_acquire_ret_cmpxchg( ; GFX6-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg: @@ -5164,18 +5164,18 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_ret_cmpxchg( ; GFX6-LABEL: global_system_one_as_release_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_system_one_as_release_acquire_ret_cmpxchg: @@ -5260,18 +5260,18 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX6-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg: @@ -5356,18 +5356,18 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX6-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg: @@ -5452,18 +5452,18 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX6-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg: Index: llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll +++ llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s @@ -8,8 +8,8 @@ define amdgpu_kernel void @global_wavefront_unordered_load( ; GFX6-LABEL: global_wavefront_unordered_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -82,8 +82,8 @@ define amdgpu_kernel void @global_wavefront_monotonic_load( ; GFX6-LABEL: global_wavefront_monotonic_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -156,8 +156,8 @@ define amdgpu_kernel void @global_wavefront_acquire_load( ; GFX6-LABEL: global_wavefront_acquire_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -230,8 +230,8 @@ define amdgpu_kernel void @global_wavefront_seq_cst_load( ; GFX6-LABEL: global_wavefront_seq_cst_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -304,12 +304,12 @@ define amdgpu_kernel void @global_wavefront_unordered_store( ; GFX6-LABEL: global_wavefront_unordered_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -365,12 +365,12 @@ define amdgpu_kernel void @global_wavefront_monotonic_store( ; GFX6-LABEL: global_wavefront_monotonic_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -426,12 +426,12 @@ define amdgpu_kernel void @global_wavefront_release_store( ; GFX6-LABEL: global_wavefront_release_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -487,12 +487,12 @@ define amdgpu_kernel void @global_wavefront_seq_cst_store( ; GFX6-LABEL: global_wavefront_seq_cst_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -548,13 +548,13 @@ define amdgpu_kernel void @global_wavefront_monotonic_atomicrmw( ; GFX6-LABEL: global_wavefront_monotonic_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_monotonic_atomicrmw: @@ -609,13 +609,13 @@ define amdgpu_kernel void @global_wavefront_acquire_atomicrmw( ; GFX6-LABEL: global_wavefront_acquire_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_acquire_atomicrmw: @@ -670,13 +670,13 @@ define amdgpu_kernel void @global_wavefront_release_atomicrmw( ; GFX6-LABEL: global_wavefront_release_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_release_atomicrmw: @@ -731,13 +731,13 @@ define amdgpu_kernel void @global_wavefront_acq_rel_atomicrmw( ; GFX6-LABEL: global_wavefront_acq_rel_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_acq_rel_atomicrmw: @@ -792,13 +792,13 @@ define amdgpu_kernel void @global_wavefront_seq_cst_atomicrmw( ; GFX6-LABEL: global_wavefront_seq_cst_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_seq_cst_atomicrmw: @@ -853,15 +853,15 @@ define amdgpu_kernel void @global_wavefront_acquire_ret_atomicrmw( ; GFX6-LABEL: global_wavefront_acquire_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_acquire_ret_atomicrmw: @@ -925,15 +925,15 @@ define amdgpu_kernel void @global_wavefront_acq_rel_ret_atomicrmw( ; GFX6-LABEL: global_wavefront_acq_rel_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_acq_rel_ret_atomicrmw: @@ -997,15 +997,15 @@ define amdgpu_kernel void @global_wavefront_seq_cst_ret_atomicrmw( ; GFX6-LABEL: global_wavefront_seq_cst_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_seq_cst_ret_atomicrmw: @@ -1069,14 +1069,14 @@ define amdgpu_kernel void @global_wavefront_monotonic_monotonic_cmpxchg( ; GFX6-LABEL: global_wavefront_monotonic_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_monotonic_monotonic_cmpxchg: @@ -1138,14 +1138,14 @@ define amdgpu_kernel void @global_wavefront_acquire_monotonic_cmpxchg( ; GFX6-LABEL: global_wavefront_acquire_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_acquire_monotonic_cmpxchg: @@ -1207,14 +1207,14 @@ define amdgpu_kernel void @global_wavefront_release_monotonic_cmpxchg( ; GFX6-LABEL: global_wavefront_release_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_release_monotonic_cmpxchg: @@ -1276,14 +1276,14 @@ define amdgpu_kernel void @global_wavefront_acq_rel_monotonic_cmpxchg( ; GFX6-LABEL: global_wavefront_acq_rel_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_acq_rel_monotonic_cmpxchg: @@ -1345,14 +1345,14 @@ define amdgpu_kernel void @global_wavefront_seq_cst_monotonic_cmpxchg( ; GFX6-LABEL: global_wavefront_seq_cst_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_seq_cst_monotonic_cmpxchg: @@ -1414,14 +1414,14 @@ define amdgpu_kernel void @global_wavefront_acquire_acquire_cmpxchg( ; GFX6-LABEL: global_wavefront_acquire_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_acquire_acquire_cmpxchg: @@ -1483,14 +1483,14 @@ define amdgpu_kernel void @global_wavefront_release_acquire_cmpxchg( ; GFX6-LABEL: global_wavefront_release_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_release_acquire_cmpxchg: @@ -1552,14 +1552,14 @@ define amdgpu_kernel void @global_wavefront_acq_rel_acquire_cmpxchg( ; GFX6-LABEL: global_wavefront_acq_rel_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_acq_rel_acquire_cmpxchg: @@ -1621,14 +1621,14 @@ define amdgpu_kernel void @global_wavefront_seq_cst_acquire_cmpxchg( ; GFX6-LABEL: global_wavefront_seq_cst_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_seq_cst_acquire_cmpxchg: @@ -1690,14 +1690,14 @@ define amdgpu_kernel void @global_wavefront_seq_cst_seq_cst_cmpxchg( ; GFX6-LABEL: global_wavefront_seq_cst_seq_cst_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_seq_cst_seq_cst_cmpxchg: @@ -1759,16 +1759,16 @@ define amdgpu_kernel void @global_wavefront_acquire_monotonic_ret_cmpxchg( ; GFX6-LABEL: global_wavefront_acquire_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_acquire_monotonic_ret_cmpxchg: @@ -1842,16 +1842,16 @@ define amdgpu_kernel void @global_wavefront_acq_rel_monotonic_ret_cmpxchg( ; GFX6-LABEL: global_wavefront_acq_rel_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_acq_rel_monotonic_ret_cmpxchg: @@ -1925,16 +1925,16 @@ define amdgpu_kernel void @global_wavefront_seq_cst_monotonic_ret_cmpxchg( ; GFX6-LABEL: global_wavefront_seq_cst_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_seq_cst_monotonic_ret_cmpxchg: @@ -2008,16 +2008,16 @@ define amdgpu_kernel void @global_wavefront_acquire_acquire_ret_cmpxchg( ; GFX6-LABEL: global_wavefront_acquire_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_acquire_acquire_ret_cmpxchg: @@ -2091,16 +2091,16 @@ define amdgpu_kernel void @global_wavefront_release_acquire_ret_cmpxchg( ; GFX6-LABEL: global_wavefront_release_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_release_acquire_ret_cmpxchg: @@ -2174,16 +2174,16 @@ define amdgpu_kernel void @global_wavefront_acq_rel_acquire_ret_cmpxchg( ; GFX6-LABEL: global_wavefront_acq_rel_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_acq_rel_acquire_ret_cmpxchg: @@ -2257,16 +2257,16 @@ define amdgpu_kernel void @global_wavefront_seq_cst_acquire_ret_cmpxchg( ; GFX6-LABEL: global_wavefront_seq_cst_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_seq_cst_acquire_ret_cmpxchg: @@ -2340,16 +2340,16 @@ define amdgpu_kernel void @global_wavefront_seq_cst_seq_cst_ret_cmpxchg( ; GFX6-LABEL: global_wavefront_seq_cst_seq_cst_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_seq_cst_seq_cst_ret_cmpxchg: @@ -2423,8 +2423,8 @@ define amdgpu_kernel void @global_wavefront_one_as_unordered_load( ; GFX6-LABEL: global_wavefront_one_as_unordered_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -2497,8 +2497,8 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_load( ; GFX6-LABEL: global_wavefront_one_as_monotonic_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -2571,8 +2571,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_load( ; GFX6-LABEL: global_wavefront_one_as_acquire_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -2645,8 +2645,8 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_load( ; GFX6-LABEL: global_wavefront_one_as_seq_cst_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -2719,12 +2719,12 @@ define amdgpu_kernel void @global_wavefront_one_as_unordered_store( ; GFX6-LABEL: global_wavefront_one_as_unordered_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -2780,12 +2780,12 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_store( ; GFX6-LABEL: global_wavefront_one_as_monotonic_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -2841,12 +2841,12 @@ define amdgpu_kernel void @global_wavefront_one_as_release_store( ; GFX6-LABEL: global_wavefront_one_as_release_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -2902,12 +2902,12 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_store( ; GFX6-LABEL: global_wavefront_one_as_seq_cst_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -2963,13 +2963,13 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_atomicrmw( ; GFX6-LABEL: global_wavefront_one_as_monotonic_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_one_as_monotonic_atomicrmw: @@ -3024,13 +3024,13 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_atomicrmw( ; GFX6-LABEL: global_wavefront_one_as_acquire_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_one_as_acquire_atomicrmw: @@ -3085,13 +3085,13 @@ define amdgpu_kernel void @global_wavefront_one_as_release_atomicrmw( ; GFX6-LABEL: global_wavefront_one_as_release_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_one_as_release_atomicrmw: @@ -3146,13 +3146,13 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_atomicrmw( ; GFX6-LABEL: global_wavefront_one_as_acq_rel_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_one_as_acq_rel_atomicrmw: @@ -3207,13 +3207,13 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_atomicrmw( ; GFX6-LABEL: global_wavefront_one_as_seq_cst_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_one_as_seq_cst_atomicrmw: @@ -3268,15 +3268,15 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_ret_atomicrmw( ; GFX6-LABEL: global_wavefront_one_as_acquire_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_one_as_acquire_ret_atomicrmw: @@ -3340,15 +3340,15 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_ret_atomicrmw( ; GFX6-LABEL: global_wavefront_one_as_acq_rel_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_one_as_acq_rel_ret_atomicrmw: @@ -3412,15 +3412,15 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_ret_atomicrmw( ; GFX6-LABEL: global_wavefront_one_as_seq_cst_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_one_as_seq_cst_ret_atomicrmw: @@ -3484,14 +3484,14 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_monotonic_cmpxchg( ; GFX6-LABEL: global_wavefront_one_as_monotonic_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_one_as_monotonic_monotonic_cmpxchg: @@ -3553,14 +3553,14 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_monotonic_cmpxchg( ; GFX6-LABEL: global_wavefront_one_as_acquire_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_one_as_acquire_monotonic_cmpxchg: @@ -3622,14 +3622,14 @@ define amdgpu_kernel void @global_wavefront_one_as_release_monotonic_cmpxchg( ; GFX6-LABEL: global_wavefront_one_as_release_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_one_as_release_monotonic_cmpxchg: @@ -3691,14 +3691,14 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_monotonic_cmpxchg( ; GFX6-LABEL: global_wavefront_one_as_acq_rel_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_one_as_acq_rel_monotonic_cmpxchg: @@ -3760,14 +3760,14 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_monotonic_cmpxchg( ; GFX6-LABEL: global_wavefront_one_as_seq_cst_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_one_as_seq_cst_monotonic_cmpxchg: @@ -3829,14 +3829,14 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_acquire_cmpxchg( ; GFX6-LABEL: global_wavefront_one_as_acquire_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_one_as_acquire_acquire_cmpxchg: @@ -3898,14 +3898,14 @@ define amdgpu_kernel void @global_wavefront_one_as_release_acquire_cmpxchg( ; GFX6-LABEL: global_wavefront_one_as_release_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_one_as_release_acquire_cmpxchg: @@ -3967,14 +3967,14 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_acquire_cmpxchg( ; GFX6-LABEL: global_wavefront_one_as_acq_rel_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_one_as_acq_rel_acquire_cmpxchg: @@ -4036,14 +4036,14 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_acquire_cmpxchg( ; GFX6-LABEL: global_wavefront_one_as_seq_cst_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_one_as_seq_cst_acquire_cmpxchg: @@ -4105,14 +4105,14 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_seq_cst_cmpxchg( ; GFX6-LABEL: global_wavefront_one_as_seq_cst_seq_cst_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_one_as_seq_cst_seq_cst_cmpxchg: @@ -4174,16 +4174,16 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_monotonic_ret_cmpxchg( ; GFX6-LABEL: global_wavefront_one_as_acquire_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_one_as_acquire_monotonic_ret_cmpxchg: @@ -4257,16 +4257,16 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX6-LABEL: global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: @@ -4340,16 +4340,16 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX6-LABEL: global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: @@ -4423,16 +4423,16 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_acquire_ret_cmpxchg( ; GFX6-LABEL: global_wavefront_one_as_acquire_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_one_as_acquire_acquire_ret_cmpxchg: @@ -4506,16 +4506,16 @@ define amdgpu_kernel void @global_wavefront_one_as_release_acquire_ret_cmpxchg( ; GFX6-LABEL: global_wavefront_one_as_release_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_one_as_release_acquire_ret_cmpxchg: @@ -4589,16 +4589,16 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX6-LABEL: global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: @@ -4672,16 +4672,16 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX6-LABEL: global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: @@ -4755,16 +4755,16 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX6-LABEL: global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: Index: llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll +++ llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s @@ -8,8 +8,8 @@ define amdgpu_kernel void @global_workgroup_unordered_load( ; GFX6-LABEL: global_workgroup_unordered_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -82,8 +82,8 @@ define amdgpu_kernel void @global_workgroup_monotonic_load( ; GFX6-LABEL: global_workgroup_monotonic_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -156,8 +156,8 @@ define amdgpu_kernel void @global_workgroup_acquire_load( ; GFX6-LABEL: global_workgroup_acquire_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -231,8 +231,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_load( ; GFX6-LABEL: global_workgroup_seq_cst_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -310,12 +310,12 @@ define amdgpu_kernel void @global_workgroup_unordered_store( ; GFX6-LABEL: global_workgroup_unordered_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -371,12 +371,12 @@ define amdgpu_kernel void @global_workgroup_monotonic_store( ; GFX6-LABEL: global_workgroup_monotonic_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -432,12 +432,12 @@ define amdgpu_kernel void @global_workgroup_release_store( ; GFX6-LABEL: global_workgroup_release_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm @@ -499,12 +499,12 @@ define amdgpu_kernel void @global_workgroup_seq_cst_store( ; GFX6-LABEL: global_workgroup_seq_cst_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm @@ -566,13 +566,13 @@ define amdgpu_kernel void @global_workgroup_monotonic_atomicrmw( ; GFX6-LABEL: global_workgroup_monotonic_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_monotonic_atomicrmw: @@ -627,13 +627,13 @@ define amdgpu_kernel void @global_workgroup_acquire_atomicrmw( ; GFX6-LABEL: global_workgroup_acquire_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm ; @@ -695,14 +695,14 @@ define amdgpu_kernel void @global_workgroup_release_atomicrmw( ; GFX6-LABEL: global_workgroup_release_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_release_atomicrmw: @@ -762,14 +762,14 @@ define amdgpu_kernel void @global_workgroup_acq_rel_atomicrmw( ; GFX6-LABEL: global_workgroup_acq_rel_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm ; @@ -836,14 +836,14 @@ define amdgpu_kernel void @global_workgroup_seq_cst_atomicrmw( ; GFX6-LABEL: global_workgroup_seq_cst_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm ; @@ -910,15 +910,15 @@ define amdgpu_kernel void @global_workgroup_acquire_ret_atomicrmw( ; GFX6-LABEL: global_workgroup_acquire_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_acquire_ret_atomicrmw: @@ -983,16 +983,16 @@ define amdgpu_kernel void @global_workgroup_acq_rel_ret_atomicrmw( ; GFX6-LABEL: global_workgroup_acq_rel_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_acq_rel_ret_atomicrmw: @@ -1062,16 +1062,16 @@ define amdgpu_kernel void @global_workgroup_seq_cst_ret_atomicrmw( ; GFX6-LABEL: global_workgroup_seq_cst_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_seq_cst_ret_atomicrmw: @@ -1141,14 +1141,14 @@ define amdgpu_kernel void @global_workgroup_monotonic_monotonic_cmpxchg( ; GFX6-LABEL: global_workgroup_monotonic_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_monotonic_monotonic_cmpxchg: @@ -1210,14 +1210,14 @@ define amdgpu_kernel void @global_workgroup_acquire_monotonic_cmpxchg( ; GFX6-LABEL: global_workgroup_acquire_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm ; @@ -1286,15 +1286,15 @@ define amdgpu_kernel void @global_workgroup_release_monotonic_cmpxchg( ; GFX6-LABEL: global_workgroup_release_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_release_monotonic_cmpxchg: @@ -1361,15 +1361,15 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_cmpxchg( ; GFX6-LABEL: global_workgroup_acq_rel_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm ; @@ -1443,15 +1443,15 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_cmpxchg( ; GFX6-LABEL: global_workgroup_seq_cst_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm ; @@ -1525,14 +1525,14 @@ define amdgpu_kernel void @global_workgroup_acquire_acquire_cmpxchg( ; GFX6-LABEL: global_workgroup_acquire_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm ; @@ -1601,15 +1601,15 @@ define amdgpu_kernel void @global_workgroup_release_acquire_cmpxchg( ; GFX6-LABEL: global_workgroup_release_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm ; @@ -1683,15 +1683,15 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_cmpxchg( ; GFX6-LABEL: global_workgroup_acq_rel_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm ; @@ -1765,15 +1765,15 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_cmpxchg( ; GFX6-LABEL: global_workgroup_seq_cst_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm ; @@ -1847,15 +1847,15 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_cmpxchg( ; GFX6-LABEL: global_workgroup_seq_cst_seq_cst_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm ; @@ -1929,16 +1929,16 @@ define amdgpu_kernel void @global_workgroup_acquire_monotonic_ret_cmpxchg( ; GFX6-LABEL: global_workgroup_acquire_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_acquire_monotonic_ret_cmpxchg: @@ -2014,17 +2014,17 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_ret_cmpxchg( ; GFX6-LABEL: global_workgroup_acq_rel_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_acq_rel_monotonic_ret_cmpxchg: @@ -2105,17 +2105,17 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_ret_cmpxchg( ; GFX6-LABEL: global_workgroup_seq_cst_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_seq_cst_monotonic_ret_cmpxchg: @@ -2196,16 +2196,16 @@ define amdgpu_kernel void @global_workgroup_acquire_acquire_ret_cmpxchg( ; GFX6-LABEL: global_workgroup_acquire_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_acquire_acquire_ret_cmpxchg: @@ -2281,17 +2281,17 @@ define amdgpu_kernel void @global_workgroup_release_acquire_ret_cmpxchg( ; GFX6-LABEL: global_workgroup_release_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_release_acquire_ret_cmpxchg: @@ -2372,17 +2372,17 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_ret_cmpxchg( ; GFX6-LABEL: global_workgroup_acq_rel_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_acq_rel_acquire_ret_cmpxchg: @@ -2463,17 +2463,17 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_ret_cmpxchg( ; GFX6-LABEL: global_workgroup_seq_cst_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_seq_cst_acquire_ret_cmpxchg: @@ -2554,17 +2554,17 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_ret_cmpxchg( ; GFX6-LABEL: global_workgroup_seq_cst_seq_cst_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_seq_cst_seq_cst_ret_cmpxchg: @@ -2645,8 +2645,8 @@ define amdgpu_kernel void @global_workgroup_one_as_unordered_load( ; GFX6-LABEL: global_workgroup_one_as_unordered_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -2719,8 +2719,8 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_load( ; GFX6-LABEL: global_workgroup_one_as_monotonic_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -2793,8 +2793,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_load( ; GFX6-LABEL: global_workgroup_one_as_acquire_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -2868,8 +2868,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_load( ; GFX6-LABEL: global_workgroup_one_as_seq_cst_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_mov_b32 s0, s4 @@ -2944,12 +2944,12 @@ define amdgpu_kernel void @global_workgroup_one_as_unordered_store( ; GFX6-LABEL: global_workgroup_one_as_unordered_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -3005,12 +3005,12 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_store( ; GFX6-LABEL: global_workgroup_one_as_monotonic_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -3066,12 +3066,12 @@ define amdgpu_kernel void @global_workgroup_one_as_release_store( ; GFX6-LABEL: global_workgroup_one_as_release_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -3129,12 +3129,12 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_store( ; GFX6-LABEL: global_workgroup_one_as_seq_cst_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; @@ -3192,13 +3192,13 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_atomicrmw( ; GFX6-LABEL: global_workgroup_one_as_monotonic_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_one_as_monotonic_atomicrmw: @@ -3253,13 +3253,13 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_atomicrmw( ; GFX6-LABEL: global_workgroup_one_as_acquire_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_one_as_acquire_atomicrmw: @@ -3316,13 +3316,13 @@ define amdgpu_kernel void @global_workgroup_one_as_release_atomicrmw( ; GFX6-LABEL: global_workgroup_one_as_release_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_one_as_release_atomicrmw: @@ -3379,13 +3379,13 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_atomicrmw( ; GFX6-LABEL: global_workgroup_one_as_acq_rel_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_one_as_acq_rel_atomicrmw: @@ -3444,13 +3444,13 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_atomicrmw( ; GFX6-LABEL: global_workgroup_one_as_seq_cst_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_one_as_seq_cst_atomicrmw: @@ -3509,15 +3509,15 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_ret_atomicrmw( ; GFX6-LABEL: global_workgroup_one_as_acquire_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_one_as_acquire_ret_atomicrmw: @@ -3582,15 +3582,15 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_ret_atomicrmw( ; GFX6-LABEL: global_workgroup_one_as_acq_rel_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_one_as_acq_rel_ret_atomicrmw: @@ -3657,15 +3657,15 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_ret_atomicrmw( ; GFX6-LABEL: global_workgroup_one_as_seq_cst_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s4, s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_atomic_swap v0, off, s[4:7], 0 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: buffer_atomic_swap v0, off, s[0:3], 0 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_one_as_seq_cst_ret_atomicrmw: @@ -3732,14 +3732,14 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_monotonic_cmpxchg( ; GFX6-LABEL: global_workgroup_one_as_monotonic_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_one_as_monotonic_monotonic_cmpxchg: @@ -3801,14 +3801,14 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_monotonic_cmpxchg( ; GFX6-LABEL: global_workgroup_one_as_acquire_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_one_as_acquire_monotonic_cmpxchg: @@ -3872,14 +3872,14 @@ define amdgpu_kernel void @global_workgroup_one_as_release_monotonic_cmpxchg( ; GFX6-LABEL: global_workgroup_one_as_release_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_one_as_release_monotonic_cmpxchg: @@ -3943,14 +3943,14 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_monotonic_cmpxchg( ; GFX6-LABEL: global_workgroup_one_as_acq_rel_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_one_as_acq_rel_monotonic_cmpxchg: @@ -4016,14 +4016,14 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_monotonic_cmpxchg( ; GFX6-LABEL: global_workgroup_one_as_seq_cst_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_one_as_seq_cst_monotonic_cmpxchg: @@ -4089,14 +4089,14 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_acquire_cmpxchg( ; GFX6-LABEL: global_workgroup_one_as_acquire_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_one_as_acquire_acquire_cmpxchg: @@ -4160,14 +4160,14 @@ define amdgpu_kernel void @global_workgroup_one_as_release_acquire_cmpxchg( ; GFX6-LABEL: global_workgroup_one_as_release_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_one_as_release_acquire_cmpxchg: @@ -4233,14 +4233,14 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_acquire_cmpxchg( ; GFX6-LABEL: global_workgroup_one_as_acq_rel_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_one_as_acq_rel_acquire_cmpxchg: @@ -4306,14 +4306,14 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_acquire_cmpxchg( ; GFX6-LABEL: global_workgroup_one_as_seq_cst_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_one_as_seq_cst_acquire_cmpxchg: @@ -4379,14 +4379,14 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_seq_cst_cmpxchg( ; GFX6-LABEL: global_workgroup_one_as_seq_cst_seq_cst_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_one_as_seq_cst_seq_cst_cmpxchg: @@ -4452,16 +4452,16 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_monotonic_ret_cmpxchg( ; GFX6-LABEL: global_workgroup_one_as_acquire_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_one_as_acquire_monotonic_ret_cmpxchg: @@ -4536,16 +4536,16 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX6-LABEL: global_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg: @@ -4622,16 +4622,16 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX6-LABEL: global_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg: @@ -4708,16 +4708,16 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_acquire_ret_cmpxchg( ; GFX6-LABEL: global_workgroup_one_as_acquire_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_one_as_acquire_acquire_ret_cmpxchg: @@ -4792,16 +4792,16 @@ define amdgpu_kernel void @global_workgroup_one_as_release_acquire_ret_cmpxchg( ; GFX6-LABEL: global_workgroup_one_as_release_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_one_as_release_acquire_ret_cmpxchg: @@ -4878,16 +4878,16 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX6-LABEL: global_workgroup_one_as_acq_rel_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_one_as_acq_rel_acquire_ret_cmpxchg: @@ -4964,16 +4964,16 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX6-LABEL: global_workgroup_one_as_seq_cst_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_one_as_seq_cst_acquire_ret_cmpxchg: @@ -5050,16 +5050,16 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX6-LABEL: global_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x2 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc +; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s5 +; GFX6-NEXT: buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: global_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg: Index: llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll +++ llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s @@ -8,13 +8,13 @@ define amdgpu_kernel void @local_agent_unordered_load( ; GFX6-LABEL: local_agent_unordered_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm @@ -74,13 +74,13 @@ define amdgpu_kernel void @local_agent_monotonic_load( ; GFX6-LABEL: local_agent_monotonic_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm @@ -140,15 +140,15 @@ define amdgpu_kernel void @local_agent_acquire_load( ; GFX6-LABEL: local_agent_acquire_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm ; @@ -212,16 +212,16 @@ define amdgpu_kernel void @local_agent_seq_cst_load( ; GFX6-LABEL: local_agent_seq_cst_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: ds_read_b32 v0, v0 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm ; @@ -291,12 +291,12 @@ define amdgpu_kernel void @local_agent_unordered_store( ; GFX6-LABEL: local_agent_unordered_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -346,12 +346,12 @@ define amdgpu_kernel void @local_agent_monotonic_store( ; GFX6-LABEL: local_agent_monotonic_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -401,12 +401,12 @@ define amdgpu_kernel void @local_agent_release_store( ; GFX6-LABEL: local_agent_release_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm @@ -463,12 +463,12 @@ define amdgpu_kernel void @local_agent_seq_cst_store( ; GFX6-LABEL: local_agent_seq_cst_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm @@ -525,12 +525,12 @@ define amdgpu_kernel void @local_agent_monotonic_atomicrmw( ; GFX6-LABEL: local_agent_monotonic_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -580,12 +580,12 @@ define amdgpu_kernel void @local_agent_acquire_atomicrmw( ; GFX6-LABEL: local_agent_acquire_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -648,12 +648,12 @@ define amdgpu_kernel void @local_agent_release_atomicrmw( ; GFX6-LABEL: local_agent_release_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm @@ -710,12 +710,12 @@ define amdgpu_kernel void @local_agent_acq_rel_atomicrmw( ; GFX6-LABEL: local_agent_acq_rel_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -785,12 +785,12 @@ define amdgpu_kernel void @local_agent_seq_cst_atomicrmw( ; GFX6-LABEL: local_agent_seq_cst_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -860,12 +860,12 @@ define amdgpu_kernel void @local_agent_acquire_ret_atomicrmw( ; GFX6-LABEL: local_agent_acquire_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -934,12 +934,12 @@ define amdgpu_kernel void @local_agent_acq_rel_ret_atomicrmw( ; GFX6-LABEL: local_agent_acq_rel_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -1015,12 +1015,12 @@ define amdgpu_kernel void @local_agent_seq_cst_ret_atomicrmw( ; GFX6-LABEL: local_agent_seq_cst_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -1096,8 +1096,8 @@ define amdgpu_kernel void @local_agent_monotonic_monotonic_cmpxchg( ; GFX6-LABEL: local_agent_monotonic_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1158,8 +1158,8 @@ define amdgpu_kernel void @local_agent_acquire_monotonic_cmpxchg( ; GFX6-LABEL: local_agent_acquire_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1233,8 +1233,8 @@ define amdgpu_kernel void @local_agent_release_monotonic_cmpxchg( ; GFX6-LABEL: local_agent_release_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1302,8 +1302,8 @@ define amdgpu_kernel void @local_agent_acq_rel_monotonic_cmpxchg( ; GFX6-LABEL: local_agent_acq_rel_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1384,8 +1384,8 @@ define amdgpu_kernel void @local_agent_seq_cst_monotonic_cmpxchg( ; GFX6-LABEL: local_agent_seq_cst_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1466,8 +1466,8 @@ define amdgpu_kernel void @local_agent_acquire_acquire_cmpxchg( ; GFX6-LABEL: local_agent_acquire_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1541,8 +1541,8 @@ define amdgpu_kernel void @local_agent_release_acquire_cmpxchg( ; GFX6-LABEL: local_agent_release_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1623,8 +1623,8 @@ define amdgpu_kernel void @local_agent_acq_rel_acquire_cmpxchg( ; GFX6-LABEL: local_agent_acq_rel_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1705,8 +1705,8 @@ define amdgpu_kernel void @local_agent_seq_cst_acquire_cmpxchg( ; GFX6-LABEL: local_agent_seq_cst_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1787,8 +1787,8 @@ define amdgpu_kernel void @local_agent_seq_cst_seq_cst_cmpxchg( ; GFX6-LABEL: local_agent_seq_cst_seq_cst_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1869,8 +1869,8 @@ define amdgpu_kernel void @local_agent_acquire_monotonic_ret_cmpxchg( ; GFX6-LABEL: local_agent_acquire_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1949,8 +1949,8 @@ define amdgpu_kernel void @local_agent_acq_rel_monotonic_ret_cmpxchg( ; GFX6-LABEL: local_agent_acq_rel_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -2036,8 +2036,8 @@ define amdgpu_kernel void @local_agent_seq_cst_monotonic_ret_cmpxchg( ; GFX6-LABEL: local_agent_seq_cst_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -2123,8 +2123,8 @@ define amdgpu_kernel void @local_agent_acquire_acquire_ret_cmpxchg( ; GFX6-LABEL: local_agent_acquire_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -2203,8 +2203,8 @@ define amdgpu_kernel void @local_agent_release_acquire_ret_cmpxchg( ; GFX6-LABEL: local_agent_release_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -2290,8 +2290,8 @@ define amdgpu_kernel void @local_agent_acq_rel_acquire_ret_cmpxchg( ; GFX6-LABEL: local_agent_acq_rel_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -2377,8 +2377,8 @@ define amdgpu_kernel void @local_agent_seq_cst_acquire_ret_cmpxchg( ; GFX6-LABEL: local_agent_seq_cst_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -2464,8 +2464,8 @@ define amdgpu_kernel void @local_agent_seq_cst_seq_cst_ret_cmpxchg( ; GFX6-LABEL: local_agent_seq_cst_seq_cst_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -2551,13 +2551,13 @@ define amdgpu_kernel void @local_agent_one_as_unordered_load( ; GFX6-LABEL: local_agent_one_as_unordered_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm @@ -2617,13 +2617,13 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_load( ; GFX6-LABEL: local_agent_one_as_monotonic_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm @@ -2683,13 +2683,13 @@ define amdgpu_kernel void @local_agent_one_as_acquire_load( ; GFX6-LABEL: local_agent_one_as_acquire_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm @@ -2749,13 +2749,13 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_load( ; GFX6-LABEL: local_agent_one_as_seq_cst_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm @@ -2815,12 +2815,12 @@ define amdgpu_kernel void @local_agent_one_as_unordered_store( ; GFX6-LABEL: local_agent_one_as_unordered_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -2870,12 +2870,12 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_store( ; GFX6-LABEL: local_agent_one_as_monotonic_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -2925,12 +2925,12 @@ define amdgpu_kernel void @local_agent_one_as_release_store( ; GFX6-LABEL: local_agent_one_as_release_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -2980,12 +2980,12 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_store( ; GFX6-LABEL: local_agent_one_as_seq_cst_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -3035,12 +3035,12 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_atomicrmw( ; GFX6-LABEL: local_agent_one_as_monotonic_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -3090,12 +3090,12 @@ define amdgpu_kernel void @local_agent_one_as_acquire_atomicrmw( ; GFX6-LABEL: local_agent_one_as_acquire_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -3145,12 +3145,12 @@ define amdgpu_kernel void @local_agent_one_as_release_atomicrmw( ; GFX6-LABEL: local_agent_one_as_release_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -3200,12 +3200,12 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_atomicrmw( ; GFX6-LABEL: local_agent_one_as_acq_rel_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -3255,12 +3255,12 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_atomicrmw( ; GFX6-LABEL: local_agent_one_as_seq_cst_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -3310,12 +3310,12 @@ define amdgpu_kernel void @local_agent_one_as_acquire_ret_atomicrmw( ; GFX6-LABEL: local_agent_one_as_acquire_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -3376,12 +3376,12 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_ret_atomicrmw( ; GFX6-LABEL: local_agent_one_as_acq_rel_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -3442,12 +3442,12 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_ret_atomicrmw( ; GFX6-LABEL: local_agent_one_as_seq_cst_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -3508,8 +3508,8 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_monotonic_cmpxchg( ; GFX6-LABEL: local_agent_one_as_monotonic_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3570,8 +3570,8 @@ define amdgpu_kernel void @local_agent_one_as_acquire_monotonic_cmpxchg( ; GFX6-LABEL: local_agent_one_as_acquire_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3632,8 +3632,8 @@ define amdgpu_kernel void @local_agent_one_as_release_monotonic_cmpxchg( ; GFX6-LABEL: local_agent_one_as_release_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3694,8 +3694,8 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_monotonic_cmpxchg( ; GFX6-LABEL: local_agent_one_as_acq_rel_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3756,8 +3756,8 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_monotonic_cmpxchg( ; GFX6-LABEL: local_agent_one_as_seq_cst_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3818,8 +3818,8 @@ define amdgpu_kernel void @local_agent_one_as_acquire_acquire_cmpxchg( ; GFX6-LABEL: local_agent_one_as_acquire_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3880,8 +3880,8 @@ define amdgpu_kernel void @local_agent_one_as_release_acquire_cmpxchg( ; GFX6-LABEL: local_agent_one_as_release_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3942,8 +3942,8 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_acquire_cmpxchg( ; GFX6-LABEL: local_agent_one_as_acq_rel_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4004,8 +4004,8 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_acquire_cmpxchg( ; GFX6-LABEL: local_agent_one_as_seq_cst_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4066,8 +4066,8 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_seq_cst_cmpxchg( ; GFX6-LABEL: local_agent_one_as_seq_cst_seq_cst_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4128,8 +4128,8 @@ define amdgpu_kernel void @local_agent_one_as_acquire_monotonic_ret_cmpxchg( ; GFX6-LABEL: local_agent_one_as_acquire_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4202,8 +4202,8 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX6-LABEL: local_agent_one_as_acq_rel_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4276,8 +4276,8 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX6-LABEL: local_agent_one_as_seq_cst_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4350,8 +4350,8 @@ define amdgpu_kernel void @local_agent_one_as_acquire_acquire_ret_cmpxchg( ; GFX6-LABEL: local_agent_one_as_acquire_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4424,8 +4424,8 @@ define amdgpu_kernel void @local_agent_one_as_release_acquire_ret_cmpxchg( ; GFX6-LABEL: local_agent_one_as_release_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4498,8 +4498,8 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX6-LABEL: local_agent_one_as_acq_rel_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4572,8 +4572,8 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX6-LABEL: local_agent_one_as_seq_cst_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4646,8 +4646,8 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX6-LABEL: local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 Index: llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll +++ llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s @@ -8,13 +8,13 @@ define amdgpu_kernel void @local_nontemporal_load_0( ; GFX6-LABEL: local_nontemporal_load_0: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 ; GFX6-NEXT: s_mov_b32 m0, -1 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: ds_read_b32 v0, v0 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 @@ -83,13 +83,13 @@ define amdgpu_kernel void @local_nontemporal_load_1( ; GFX6-LABEL: local_nontemporal_load_1: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX6-NEXT: s_mov_b32 m0, -1 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_add_i32_e32 v0, vcc, s4, v0 +; GFX6-NEXT: v_add_i32_e32 v0, vcc, s6, v0 ; GFX6-NEXT: ds_read_b32 v0, v0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) @@ -163,14 +163,14 @@ define amdgpu_kernel void @local_nontemporal_store_0( ; GFX6-LABEL: local_nontemporal_store_0: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x2 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_load_dword s1, s[2:3], 0x0 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 +; GFX6-NEXT: v_mov_b32_e32 v0, s2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -235,15 +235,15 @@ define amdgpu_kernel void @local_nontemporal_store_1( ; GFX6-LABEL: local_nontemporal_store_1: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x2 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_load_dword s1, s[2:3], 0x0 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v0 +; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 +; GFX6-NEXT: v_add_i32_e32 v0, vcc, s2, v0 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; Index: llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll +++ llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s @@ -8,13 +8,13 @@ define amdgpu_kernel void @local_singlethread_unordered_load( ; GFX6-LABEL: local_singlethread_unordered_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm @@ -74,13 +74,13 @@ define amdgpu_kernel void @local_singlethread_monotonic_load( ; GFX6-LABEL: local_singlethread_monotonic_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm @@ -140,13 +140,13 @@ define amdgpu_kernel void @local_singlethread_acquire_load( ; GFX6-LABEL: local_singlethread_acquire_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm @@ -206,13 +206,13 @@ define amdgpu_kernel void @local_singlethread_seq_cst_load( ; GFX6-LABEL: local_singlethread_seq_cst_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm @@ -272,12 +272,12 @@ define amdgpu_kernel void @local_singlethread_unordered_store( ; GFX6-LABEL: local_singlethread_unordered_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -327,12 +327,12 @@ define amdgpu_kernel void @local_singlethread_monotonic_store( ; GFX6-LABEL: local_singlethread_monotonic_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -382,12 +382,12 @@ define amdgpu_kernel void @local_singlethread_release_store( ; GFX6-LABEL: local_singlethread_release_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -437,12 +437,12 @@ define amdgpu_kernel void @local_singlethread_seq_cst_store( ; GFX6-LABEL: local_singlethread_seq_cst_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -492,12 +492,12 @@ define amdgpu_kernel void @local_singlethread_monotonic_atomicrmw( ; GFX6-LABEL: local_singlethread_monotonic_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -547,12 +547,12 @@ define amdgpu_kernel void @local_singlethread_acquire_atomicrmw( ; GFX6-LABEL: local_singlethread_acquire_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -602,12 +602,12 @@ define amdgpu_kernel void @local_singlethread_release_atomicrmw( ; GFX6-LABEL: local_singlethread_release_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -657,12 +657,12 @@ define amdgpu_kernel void @local_singlethread_acq_rel_atomicrmw( ; GFX6-LABEL: local_singlethread_acq_rel_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -712,12 +712,12 @@ define amdgpu_kernel void @local_singlethread_seq_cst_atomicrmw( ; GFX6-LABEL: local_singlethread_seq_cst_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -767,12 +767,12 @@ define amdgpu_kernel void @local_singlethread_acquire_ret_atomicrmw( ; GFX6-LABEL: local_singlethread_acquire_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -833,12 +833,12 @@ define amdgpu_kernel void @local_singlethread_acq_rel_ret_atomicrmw( ; GFX6-LABEL: local_singlethread_acq_rel_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -899,12 +899,12 @@ define amdgpu_kernel void @local_singlethread_seq_cst_ret_atomicrmw( ; GFX6-LABEL: local_singlethread_seq_cst_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -965,8 +965,8 @@ define amdgpu_kernel void @local_singlethread_monotonic_monotonic_cmpxchg( ; GFX6-LABEL: local_singlethread_monotonic_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1027,8 +1027,8 @@ define amdgpu_kernel void @local_singlethread_acquire_monotonic_cmpxchg( ; GFX6-LABEL: local_singlethread_acquire_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1089,8 +1089,8 @@ define amdgpu_kernel void @local_singlethread_release_monotonic_cmpxchg( ; GFX6-LABEL: local_singlethread_release_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1151,8 +1151,8 @@ define amdgpu_kernel void @local_singlethread_acq_rel_monotonic_cmpxchg( ; GFX6-LABEL: local_singlethread_acq_rel_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1213,8 +1213,8 @@ define amdgpu_kernel void @local_singlethread_seq_cst_monotonic_cmpxchg( ; GFX6-LABEL: local_singlethread_seq_cst_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1275,8 +1275,8 @@ define amdgpu_kernel void @local_singlethread_acquire_acquire_cmpxchg( ; GFX6-LABEL: local_singlethread_acquire_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1337,8 +1337,8 @@ define amdgpu_kernel void @local_singlethread_release_acquire_cmpxchg( ; GFX6-LABEL: local_singlethread_release_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1399,8 +1399,8 @@ define amdgpu_kernel void @local_singlethread_acq_rel_acquire_cmpxchg( ; GFX6-LABEL: local_singlethread_acq_rel_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1461,8 +1461,8 @@ define amdgpu_kernel void @local_singlethread_seq_cst_acquire_cmpxchg( ; GFX6-LABEL: local_singlethread_seq_cst_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1523,8 +1523,8 @@ define amdgpu_kernel void @local_singlethread_seq_cst_seq_cst_cmpxchg( ; GFX6-LABEL: local_singlethread_seq_cst_seq_cst_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1585,8 +1585,8 @@ define amdgpu_kernel void @local_singlethread_acquire_monotonic_ret_cmpxchg( ; GFX6-LABEL: local_singlethread_acquire_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1659,8 +1659,8 @@ define amdgpu_kernel void @local_singlethread_acq_rel_monotonic_ret_cmpxchg( ; GFX6-LABEL: local_singlethread_acq_rel_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1733,8 +1733,8 @@ define amdgpu_kernel void @local_singlethread_seq_cst_monotonic_ret_cmpxchg( ; GFX6-LABEL: local_singlethread_seq_cst_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1807,8 +1807,8 @@ define amdgpu_kernel void @local_singlethread_acquire_acquire_ret_cmpxchg( ; GFX6-LABEL: local_singlethread_acquire_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1881,8 +1881,8 @@ define amdgpu_kernel void @local_singlethread_release_acquire_ret_cmpxchg( ; GFX6-LABEL: local_singlethread_release_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1955,8 +1955,8 @@ define amdgpu_kernel void @local_singlethread_acq_rel_acquire_ret_cmpxchg( ; GFX6-LABEL: local_singlethread_acq_rel_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -2029,8 +2029,8 @@ define amdgpu_kernel void @local_singlethread_seq_cst_acquire_ret_cmpxchg( ; GFX6-LABEL: local_singlethread_seq_cst_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -2103,8 +2103,8 @@ define amdgpu_kernel void @local_singlethread_seq_cst_seq_cst_ret_cmpxchg( ; GFX6-LABEL: local_singlethread_seq_cst_seq_cst_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -2177,13 +2177,13 @@ define amdgpu_kernel void @local_singlethread_one_as_unordered_load( ; GFX6-LABEL: local_singlethread_one_as_unordered_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm @@ -2243,13 +2243,13 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_load( ; GFX6-LABEL: local_singlethread_one_as_monotonic_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm @@ -2309,13 +2309,13 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_load( ; GFX6-LABEL: local_singlethread_one_as_acquire_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm @@ -2375,13 +2375,13 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_load( ; GFX6-LABEL: local_singlethread_one_as_seq_cst_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm @@ -2441,12 +2441,12 @@ define amdgpu_kernel void @local_singlethread_one_as_unordered_store( ; GFX6-LABEL: local_singlethread_one_as_unordered_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -2496,12 +2496,12 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_store( ; GFX6-LABEL: local_singlethread_one_as_monotonic_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -2551,12 +2551,12 @@ define amdgpu_kernel void @local_singlethread_one_as_release_store( ; GFX6-LABEL: local_singlethread_one_as_release_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -2606,12 +2606,12 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_store( ; GFX6-LABEL: local_singlethread_one_as_seq_cst_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -2661,12 +2661,12 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_atomicrmw( ; GFX6-LABEL: local_singlethread_one_as_monotonic_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -2716,12 +2716,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_atomicrmw( ; GFX6-LABEL: local_singlethread_one_as_acquire_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -2771,12 +2771,12 @@ define amdgpu_kernel void @local_singlethread_one_as_release_atomicrmw( ; GFX6-LABEL: local_singlethread_one_as_release_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -2826,12 +2826,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_atomicrmw( ; GFX6-LABEL: local_singlethread_one_as_acq_rel_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -2881,12 +2881,12 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_atomicrmw( ; GFX6-LABEL: local_singlethread_one_as_seq_cst_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -2936,12 +2936,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_ret_atomicrmw( ; GFX6-LABEL: local_singlethread_one_as_acquire_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -3002,12 +3002,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_ret_atomicrmw( ; GFX6-LABEL: local_singlethread_one_as_acq_rel_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -3068,12 +3068,12 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_ret_atomicrmw( ; GFX6-LABEL: local_singlethread_one_as_seq_cst_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -3134,8 +3134,8 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_monotonic_cmpxchg( ; GFX6-LABEL: local_singlethread_one_as_monotonic_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3196,8 +3196,8 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_monotonic_cmpxchg( ; GFX6-LABEL: local_singlethread_one_as_acquire_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3258,8 +3258,8 @@ define amdgpu_kernel void @local_singlethread_one_as_release_monotonic_cmpxchg( ; GFX6-LABEL: local_singlethread_one_as_release_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3320,8 +3320,8 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_monotonic_cmpxchg( ; GFX6-LABEL: local_singlethread_one_as_acq_rel_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3382,8 +3382,8 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_monotonic_cmpxchg( ; GFX6-LABEL: local_singlethread_one_as_seq_cst_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3444,8 +3444,8 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_acquire_cmpxchg( ; GFX6-LABEL: local_singlethread_one_as_acquire_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3506,8 +3506,8 @@ define amdgpu_kernel void @local_singlethread_one_as_release_acquire_cmpxchg( ; GFX6-LABEL: local_singlethread_one_as_release_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3568,8 +3568,8 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_acquire_cmpxchg( ; GFX6-LABEL: local_singlethread_one_as_acq_rel_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3630,8 +3630,8 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_acquire_cmpxchg( ; GFX6-LABEL: local_singlethread_one_as_seq_cst_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3692,8 +3692,8 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_seq_cst_cmpxchg( ; GFX6-LABEL: local_singlethread_one_as_seq_cst_seq_cst_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3754,8 +3754,8 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_monotonic_ret_cmpxchg( ; GFX6-LABEL: local_singlethread_one_as_acquire_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3828,8 +3828,8 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX6-LABEL: local_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3902,8 +3902,8 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX6-LABEL: local_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3976,8 +3976,8 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_acquire_ret_cmpxchg( ; GFX6-LABEL: local_singlethread_one_as_acquire_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4050,8 +4050,8 @@ define amdgpu_kernel void @local_singlethread_one_as_release_acquire_ret_cmpxchg( ; GFX6-LABEL: local_singlethread_one_as_release_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4124,8 +4124,8 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX6-LABEL: local_singlethread_one_as_acq_rel_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4198,8 +4198,8 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX6-LABEL: local_singlethread_one_as_seq_cst_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4272,8 +4272,8 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX6-LABEL: local_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 Index: llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll +++ llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s @@ -8,13 +8,13 @@ define amdgpu_kernel void @local_system_unordered_load( ; GFX6-LABEL: local_system_unordered_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm @@ -74,13 +74,13 @@ define amdgpu_kernel void @local_system_monotonic_load( ; GFX6-LABEL: local_system_monotonic_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm @@ -140,15 +140,15 @@ define amdgpu_kernel void @local_system_acquire_load( ; GFX6-LABEL: local_system_acquire_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm ; @@ -212,16 +212,16 @@ define amdgpu_kernel void @local_system_seq_cst_load( ; GFX6-LABEL: local_system_seq_cst_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: ds_read_b32 v0, v0 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm ; @@ -291,12 +291,12 @@ define amdgpu_kernel void @local_system_unordered_store( ; GFX6-LABEL: local_system_unordered_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -346,12 +346,12 @@ define amdgpu_kernel void @local_system_monotonic_store( ; GFX6-LABEL: local_system_monotonic_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -401,12 +401,12 @@ define amdgpu_kernel void @local_system_release_store( ; GFX6-LABEL: local_system_release_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm @@ -463,12 +463,12 @@ define amdgpu_kernel void @local_system_seq_cst_store( ; GFX6-LABEL: local_system_seq_cst_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm @@ -525,12 +525,12 @@ define amdgpu_kernel void @local_system_monotonic_atomicrmw( ; GFX6-LABEL: local_system_monotonic_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -580,12 +580,12 @@ define amdgpu_kernel void @local_system_acquire_atomicrmw( ; GFX6-LABEL: local_system_acquire_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -648,12 +648,12 @@ define amdgpu_kernel void @local_system_release_atomicrmw( ; GFX6-LABEL: local_system_release_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm @@ -710,12 +710,12 @@ define amdgpu_kernel void @local_system_acq_rel_atomicrmw( ; GFX6-LABEL: local_system_acq_rel_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -785,12 +785,12 @@ define amdgpu_kernel void @local_system_seq_cst_atomicrmw( ; GFX6-LABEL: local_system_seq_cst_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -860,12 +860,12 @@ define amdgpu_kernel void @local_system_acquire_ret_atomicrmw( ; GFX6-LABEL: local_system_acquire_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: buffer_wbinvl1 @@ -934,12 +934,12 @@ define amdgpu_kernel void @local_system_acq_rel_ret_atomicrmw( ; GFX6-LABEL: local_system_acq_rel_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -1015,12 +1015,12 @@ define amdgpu_kernel void @local_system_seq_cst_ret_atomicrmw( ; GFX6-LABEL: local_system_seq_cst_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -1096,8 +1096,8 @@ define amdgpu_kernel void @local_system_monotonic_monotonic_cmpxchg( ; GFX6-LABEL: local_system_monotonic_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1158,8 +1158,8 @@ define amdgpu_kernel void @local_system_acquire_monotonic_cmpxchg( ; GFX6-LABEL: local_system_acquire_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1233,8 +1233,8 @@ define amdgpu_kernel void @local_system_release_monotonic_cmpxchg( ; GFX6-LABEL: local_system_release_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1302,8 +1302,8 @@ define amdgpu_kernel void @local_system_acq_rel_monotonic_cmpxchg( ; GFX6-LABEL: local_system_acq_rel_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1384,8 +1384,8 @@ define amdgpu_kernel void @local_system_seq_cst_monotonic_cmpxchg( ; GFX6-LABEL: local_system_seq_cst_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1466,8 +1466,8 @@ define amdgpu_kernel void @local_system_acquire_acquire_cmpxchg( ; GFX6-LABEL: local_system_acquire_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1541,8 +1541,8 @@ define amdgpu_kernel void @local_system_release_acquire_cmpxchg( ; GFX6-LABEL: local_system_release_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1623,8 +1623,8 @@ define amdgpu_kernel void @local_system_acq_rel_acquire_cmpxchg( ; GFX6-LABEL: local_system_acq_rel_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1705,8 +1705,8 @@ define amdgpu_kernel void @local_system_seq_cst_acquire_cmpxchg( ; GFX6-LABEL: local_system_seq_cst_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1787,8 +1787,8 @@ define amdgpu_kernel void @local_system_seq_cst_seq_cst_cmpxchg( ; GFX6-LABEL: local_system_seq_cst_seq_cst_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1869,8 +1869,8 @@ define amdgpu_kernel void @local_system_acquire_monotonic_ret_cmpxchg( ; GFX6-LABEL: local_system_acquire_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1949,8 +1949,8 @@ define amdgpu_kernel void @local_system_acq_rel_monotonic_ret_cmpxchg( ; GFX6-LABEL: local_system_acq_rel_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -2036,8 +2036,8 @@ define amdgpu_kernel void @local_system_seq_cst_monotonic_ret_cmpxchg( ; GFX6-LABEL: local_system_seq_cst_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -2123,8 +2123,8 @@ define amdgpu_kernel void @local_system_acquire_acquire_ret_cmpxchg( ; GFX6-LABEL: local_system_acquire_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -2203,8 +2203,8 @@ define amdgpu_kernel void @local_system_release_acquire_ret_cmpxchg( ; GFX6-LABEL: local_system_release_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -2290,8 +2290,8 @@ define amdgpu_kernel void @local_system_acq_rel_acquire_ret_cmpxchg( ; GFX6-LABEL: local_system_acq_rel_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -2377,8 +2377,8 @@ define amdgpu_kernel void @local_system_seq_cst_acquire_ret_cmpxchg( ; GFX6-LABEL: local_system_seq_cst_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -2464,8 +2464,8 @@ define amdgpu_kernel void @local_system_seq_cst_seq_cst_ret_cmpxchg( ; GFX6-LABEL: local_system_seq_cst_seq_cst_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -2551,13 +2551,13 @@ define amdgpu_kernel void @local_system_one_as_unordered_load( ; GFX6-LABEL: local_system_one_as_unordered_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm @@ -2617,13 +2617,13 @@ define amdgpu_kernel void @local_system_one_as_monotonic_load( ; GFX6-LABEL: local_system_one_as_monotonic_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm @@ -2683,13 +2683,13 @@ define amdgpu_kernel void @local_system_one_as_acquire_load( ; GFX6-LABEL: local_system_one_as_acquire_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm @@ -2749,13 +2749,13 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_load( ; GFX6-LABEL: local_system_one_as_seq_cst_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm @@ -2815,12 +2815,12 @@ define amdgpu_kernel void @local_system_one_as_unordered_store( ; GFX6-LABEL: local_system_one_as_unordered_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -2870,12 +2870,12 @@ define amdgpu_kernel void @local_system_one_as_monotonic_store( ; GFX6-LABEL: local_system_one_as_monotonic_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -2925,12 +2925,12 @@ define amdgpu_kernel void @local_system_one_as_release_store( ; GFX6-LABEL: local_system_one_as_release_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -2980,12 +2980,12 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_store( ; GFX6-LABEL: local_system_one_as_seq_cst_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -3035,12 +3035,12 @@ define amdgpu_kernel void @local_system_one_as_monotonic_atomicrmw( ; GFX6-LABEL: local_system_one_as_monotonic_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -3090,12 +3090,12 @@ define amdgpu_kernel void @local_system_one_as_acquire_atomicrmw( ; GFX6-LABEL: local_system_one_as_acquire_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -3145,12 +3145,12 @@ define amdgpu_kernel void @local_system_one_as_release_atomicrmw( ; GFX6-LABEL: local_system_one_as_release_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -3200,12 +3200,12 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_atomicrmw( ; GFX6-LABEL: local_system_one_as_acq_rel_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -3255,12 +3255,12 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_atomicrmw( ; GFX6-LABEL: local_system_one_as_seq_cst_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -3310,12 +3310,12 @@ define amdgpu_kernel void @local_system_one_as_acquire_ret_atomicrmw( ; GFX6-LABEL: local_system_one_as_acquire_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -3376,12 +3376,12 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_ret_atomicrmw( ; GFX6-LABEL: local_system_one_as_acq_rel_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -3442,12 +3442,12 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_ret_atomicrmw( ; GFX6-LABEL: local_system_one_as_seq_cst_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -3508,8 +3508,8 @@ define amdgpu_kernel void @local_system_one_as_monotonic_monotonic_cmpxchg( ; GFX6-LABEL: local_system_one_as_monotonic_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3570,8 +3570,8 @@ define amdgpu_kernel void @local_system_one_as_acquire_monotonic_cmpxchg( ; GFX6-LABEL: local_system_one_as_acquire_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3632,8 +3632,8 @@ define amdgpu_kernel void @local_system_one_as_release_monotonic_cmpxchg( ; GFX6-LABEL: local_system_one_as_release_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3694,8 +3694,8 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_monotonic_cmpxchg( ; GFX6-LABEL: local_system_one_as_acq_rel_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3756,8 +3756,8 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_monotonic_cmpxchg( ; GFX6-LABEL: local_system_one_as_seq_cst_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3818,8 +3818,8 @@ define amdgpu_kernel void @local_system_one_as_acquire_acquire_cmpxchg( ; GFX6-LABEL: local_system_one_as_acquire_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3880,8 +3880,8 @@ define amdgpu_kernel void @local_system_one_as_release_acquire_cmpxchg( ; GFX6-LABEL: local_system_one_as_release_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3942,8 +3942,8 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_acquire_cmpxchg( ; GFX6-LABEL: local_system_one_as_acq_rel_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4004,8 +4004,8 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_acquire_cmpxchg( ; GFX6-LABEL: local_system_one_as_seq_cst_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4066,8 +4066,8 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_seq_cst_cmpxchg( ; GFX6-LABEL: local_system_one_as_seq_cst_seq_cst_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4128,8 +4128,8 @@ define amdgpu_kernel void @local_system_one_as_acquire_monotonic_ret_cmpxchg( ; GFX6-LABEL: local_system_one_as_acquire_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4202,8 +4202,8 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX6-LABEL: local_system_one_as_acq_rel_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4276,8 +4276,8 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX6-LABEL: local_system_one_as_seq_cst_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4350,8 +4350,8 @@ define amdgpu_kernel void @local_system_one_as_acquire_acquire_ret_cmpxchg( ; GFX6-LABEL: local_system_one_as_acquire_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4424,8 +4424,8 @@ define amdgpu_kernel void @local_system_one_as_release_acquire_ret_cmpxchg( ; GFX6-LABEL: local_system_one_as_release_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4498,8 +4498,8 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX6-LABEL: local_system_one_as_acq_rel_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4572,8 +4572,8 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX6-LABEL: local_system_one_as_seq_cst_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4646,8 +4646,8 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX6-LABEL: local_system_one_as_seq_cst_seq_cst_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 Index: llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll +++ llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s @@ -8,13 +8,13 @@ define amdgpu_kernel void @local_wavefront_unordered_load( ; GFX6-LABEL: local_wavefront_unordered_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm @@ -74,13 +74,13 @@ define amdgpu_kernel void @local_wavefront_monotonic_load( ; GFX6-LABEL: local_wavefront_monotonic_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm @@ -140,13 +140,13 @@ define amdgpu_kernel void @local_wavefront_acquire_load( ; GFX6-LABEL: local_wavefront_acquire_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm @@ -206,13 +206,13 @@ define amdgpu_kernel void @local_wavefront_seq_cst_load( ; GFX6-LABEL: local_wavefront_seq_cst_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm @@ -272,12 +272,12 @@ define amdgpu_kernel void @local_wavefront_unordered_store( ; GFX6-LABEL: local_wavefront_unordered_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -327,12 +327,12 @@ define amdgpu_kernel void @local_wavefront_monotonic_store( ; GFX6-LABEL: local_wavefront_monotonic_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -382,12 +382,12 @@ define amdgpu_kernel void @local_wavefront_release_store( ; GFX6-LABEL: local_wavefront_release_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -437,12 +437,12 @@ define amdgpu_kernel void @local_wavefront_seq_cst_store( ; GFX6-LABEL: local_wavefront_seq_cst_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -492,12 +492,12 @@ define amdgpu_kernel void @local_wavefront_monotonic_atomicrmw( ; GFX6-LABEL: local_wavefront_monotonic_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -547,12 +547,12 @@ define amdgpu_kernel void @local_wavefront_acquire_atomicrmw( ; GFX6-LABEL: local_wavefront_acquire_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -602,12 +602,12 @@ define amdgpu_kernel void @local_wavefront_release_atomicrmw( ; GFX6-LABEL: local_wavefront_release_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -657,12 +657,12 @@ define amdgpu_kernel void @local_wavefront_acq_rel_atomicrmw( ; GFX6-LABEL: local_wavefront_acq_rel_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -712,12 +712,12 @@ define amdgpu_kernel void @local_wavefront_seq_cst_atomicrmw( ; GFX6-LABEL: local_wavefront_seq_cst_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -767,12 +767,12 @@ define amdgpu_kernel void @local_wavefront_acquire_ret_atomicrmw( ; GFX6-LABEL: local_wavefront_acquire_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -833,12 +833,12 @@ define amdgpu_kernel void @local_wavefront_acq_rel_ret_atomicrmw( ; GFX6-LABEL: local_wavefront_acq_rel_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -899,12 +899,12 @@ define amdgpu_kernel void @local_wavefront_seq_cst_ret_atomicrmw( ; GFX6-LABEL: local_wavefront_seq_cst_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -965,8 +965,8 @@ define amdgpu_kernel void @local_wavefront_monotonic_monotonic_cmpxchg( ; GFX6-LABEL: local_wavefront_monotonic_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1027,8 +1027,8 @@ define amdgpu_kernel void @local_wavefront_acquire_monotonic_cmpxchg( ; GFX6-LABEL: local_wavefront_acquire_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1089,8 +1089,8 @@ define amdgpu_kernel void @local_wavefront_release_monotonic_cmpxchg( ; GFX6-LABEL: local_wavefront_release_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1151,8 +1151,8 @@ define amdgpu_kernel void @local_wavefront_acq_rel_monotonic_cmpxchg( ; GFX6-LABEL: local_wavefront_acq_rel_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1213,8 +1213,8 @@ define amdgpu_kernel void @local_wavefront_seq_cst_monotonic_cmpxchg( ; GFX6-LABEL: local_wavefront_seq_cst_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1275,8 +1275,8 @@ define amdgpu_kernel void @local_wavefront_acquire_acquire_cmpxchg( ; GFX6-LABEL: local_wavefront_acquire_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1337,8 +1337,8 @@ define amdgpu_kernel void @local_wavefront_release_acquire_cmpxchg( ; GFX6-LABEL: local_wavefront_release_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1399,8 +1399,8 @@ define amdgpu_kernel void @local_wavefront_acq_rel_acquire_cmpxchg( ; GFX6-LABEL: local_wavefront_acq_rel_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1461,8 +1461,8 @@ define amdgpu_kernel void @local_wavefront_seq_cst_acquire_cmpxchg( ; GFX6-LABEL: local_wavefront_seq_cst_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1523,8 +1523,8 @@ define amdgpu_kernel void @local_wavefront_seq_cst_seq_cst_cmpxchg( ; GFX6-LABEL: local_wavefront_seq_cst_seq_cst_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1585,8 +1585,8 @@ define amdgpu_kernel void @local_wavefront_acquire_monotonic_ret_cmpxchg( ; GFX6-LABEL: local_wavefront_acquire_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1659,8 +1659,8 @@ define amdgpu_kernel void @local_wavefront_acq_rel_monotonic_ret_cmpxchg( ; GFX6-LABEL: local_wavefront_acq_rel_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1733,8 +1733,8 @@ define amdgpu_kernel void @local_wavefront_seq_cst_monotonic_ret_cmpxchg( ; GFX6-LABEL: local_wavefront_seq_cst_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1807,8 +1807,8 @@ define amdgpu_kernel void @local_wavefront_acquire_acquire_ret_cmpxchg( ; GFX6-LABEL: local_wavefront_acquire_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1881,8 +1881,8 @@ define amdgpu_kernel void @local_wavefront_release_acquire_ret_cmpxchg( ; GFX6-LABEL: local_wavefront_release_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1955,8 +1955,8 @@ define amdgpu_kernel void @local_wavefront_acq_rel_acquire_ret_cmpxchg( ; GFX6-LABEL: local_wavefront_acq_rel_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -2029,8 +2029,8 @@ define amdgpu_kernel void @local_wavefront_seq_cst_acquire_ret_cmpxchg( ; GFX6-LABEL: local_wavefront_seq_cst_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -2103,8 +2103,8 @@ define amdgpu_kernel void @local_wavefront_seq_cst_seq_cst_ret_cmpxchg( ; GFX6-LABEL: local_wavefront_seq_cst_seq_cst_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -2177,13 +2177,13 @@ define amdgpu_kernel void @local_wavefront_one_as_unordered_load( ; GFX6-LABEL: local_wavefront_one_as_unordered_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm @@ -2243,13 +2243,13 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_load( ; GFX6-LABEL: local_wavefront_one_as_monotonic_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm @@ -2309,13 +2309,13 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_load( ; GFX6-LABEL: local_wavefront_one_as_acquire_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm @@ -2375,13 +2375,13 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_load( ; GFX6-LABEL: local_wavefront_one_as_seq_cst_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm @@ -2441,12 +2441,12 @@ define amdgpu_kernel void @local_wavefront_one_as_unordered_store( ; GFX6-LABEL: local_wavefront_one_as_unordered_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -2496,12 +2496,12 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_store( ; GFX6-LABEL: local_wavefront_one_as_monotonic_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -2551,12 +2551,12 @@ define amdgpu_kernel void @local_wavefront_one_as_release_store( ; GFX6-LABEL: local_wavefront_one_as_release_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -2606,12 +2606,12 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_store( ; GFX6-LABEL: local_wavefront_one_as_seq_cst_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -2661,12 +2661,12 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_atomicrmw( ; GFX6-LABEL: local_wavefront_one_as_monotonic_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -2716,12 +2716,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_atomicrmw( ; GFX6-LABEL: local_wavefront_one_as_acquire_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -2771,12 +2771,12 @@ define amdgpu_kernel void @local_wavefront_one_as_release_atomicrmw( ; GFX6-LABEL: local_wavefront_one_as_release_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -2826,12 +2826,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_atomicrmw( ; GFX6-LABEL: local_wavefront_one_as_acq_rel_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -2881,12 +2881,12 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_atomicrmw( ; GFX6-LABEL: local_wavefront_one_as_seq_cst_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -2936,12 +2936,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_ret_atomicrmw( ; GFX6-LABEL: local_wavefront_one_as_acquire_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -3002,12 +3002,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_ret_atomicrmw( ; GFX6-LABEL: local_wavefront_one_as_acq_rel_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -3068,12 +3068,12 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_ret_atomicrmw( ; GFX6-LABEL: local_wavefront_one_as_seq_cst_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -3134,8 +3134,8 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_monotonic_cmpxchg( ; GFX6-LABEL: local_wavefront_one_as_monotonic_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3196,8 +3196,8 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_monotonic_cmpxchg( ; GFX6-LABEL: local_wavefront_one_as_acquire_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3258,8 +3258,8 @@ define amdgpu_kernel void @local_wavefront_one_as_release_monotonic_cmpxchg( ; GFX6-LABEL: local_wavefront_one_as_release_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3320,8 +3320,8 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_monotonic_cmpxchg( ; GFX6-LABEL: local_wavefront_one_as_acq_rel_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3382,8 +3382,8 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_monotonic_cmpxchg( ; GFX6-LABEL: local_wavefront_one_as_seq_cst_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3444,8 +3444,8 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_acquire_cmpxchg( ; GFX6-LABEL: local_wavefront_one_as_acquire_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3506,8 +3506,8 @@ define amdgpu_kernel void @local_wavefront_one_as_release_acquire_cmpxchg( ; GFX6-LABEL: local_wavefront_one_as_release_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3568,8 +3568,8 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_acquire_cmpxchg( ; GFX6-LABEL: local_wavefront_one_as_acq_rel_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3630,8 +3630,8 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_acquire_cmpxchg( ; GFX6-LABEL: local_wavefront_one_as_seq_cst_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3692,8 +3692,8 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_seq_cst_cmpxchg( ; GFX6-LABEL: local_wavefront_one_as_seq_cst_seq_cst_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3754,8 +3754,8 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_monotonic_ret_cmpxchg( ; GFX6-LABEL: local_wavefront_one_as_acquire_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3828,8 +3828,8 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX6-LABEL: local_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3902,8 +3902,8 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX6-LABEL: local_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3976,8 +3976,8 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_acquire_ret_cmpxchg( ; GFX6-LABEL: local_wavefront_one_as_acquire_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4050,8 +4050,8 @@ define amdgpu_kernel void @local_wavefront_one_as_release_acquire_ret_cmpxchg( ; GFX6-LABEL: local_wavefront_one_as_release_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4124,8 +4124,8 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX6-LABEL: local_wavefront_one_as_acq_rel_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4198,8 +4198,8 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX6-LABEL: local_wavefront_one_as_seq_cst_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4272,8 +4272,8 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX6-LABEL: local_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 Index: llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll +++ llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s @@ -8,13 +8,13 @@ define amdgpu_kernel void @local_workgroup_unordered_load( ; GFX6-LABEL: local_workgroup_unordered_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm @@ -74,13 +74,13 @@ define amdgpu_kernel void @local_workgroup_monotonic_load( ; GFX6-LABEL: local_workgroup_monotonic_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm @@ -140,14 +140,14 @@ define amdgpu_kernel void @local_workgroup_acquire_load( ; GFX6-LABEL: local_workgroup_acquire_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm ; @@ -207,15 +207,15 @@ define amdgpu_kernel void @local_workgroup_seq_cst_load( ; GFX6-LABEL: local_workgroup_seq_cst_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_read_b32 v0, v0 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm ; @@ -280,12 +280,12 @@ define amdgpu_kernel void @local_workgroup_unordered_store( ; GFX6-LABEL: local_workgroup_unordered_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -335,12 +335,12 @@ define amdgpu_kernel void @local_workgroup_monotonic_store( ; GFX6-LABEL: local_workgroup_monotonic_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -390,12 +390,12 @@ define amdgpu_kernel void @local_workgroup_release_store( ; GFX6-LABEL: local_workgroup_release_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm @@ -451,12 +451,12 @@ define amdgpu_kernel void @local_workgroup_seq_cst_store( ; GFX6-LABEL: local_workgroup_seq_cst_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm @@ -512,12 +512,12 @@ define amdgpu_kernel void @local_workgroup_monotonic_atomicrmw( ; GFX6-LABEL: local_workgroup_monotonic_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -567,12 +567,12 @@ define amdgpu_kernel void @local_workgroup_acquire_atomicrmw( ; GFX6-LABEL: local_workgroup_acquire_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_endpgm @@ -629,12 +629,12 @@ define amdgpu_kernel void @local_workgroup_release_atomicrmw( ; GFX6-LABEL: local_workgroup_release_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm @@ -690,12 +690,12 @@ define amdgpu_kernel void @local_workgroup_acq_rel_atomicrmw( ; GFX6-LABEL: local_workgroup_acq_rel_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) @@ -758,12 +758,12 @@ define amdgpu_kernel void @local_workgroup_seq_cst_atomicrmw( ; GFX6-LABEL: local_workgroup_seq_cst_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) @@ -826,12 +826,12 @@ define amdgpu_kernel void @local_workgroup_acquire_ret_atomicrmw( ; GFX6-LABEL: local_workgroup_acquire_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -894,12 +894,12 @@ define amdgpu_kernel void @local_workgroup_acq_rel_ret_atomicrmw( ; GFX6-LABEL: local_workgroup_acq_rel_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) @@ -968,12 +968,12 @@ define amdgpu_kernel void @local_workgroup_seq_cst_ret_atomicrmw( ; GFX6-LABEL: local_workgroup_seq_cst_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) @@ -1042,8 +1042,8 @@ define amdgpu_kernel void @local_workgroup_monotonic_monotonic_cmpxchg( ; GFX6-LABEL: local_workgroup_monotonic_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1104,8 +1104,8 @@ define amdgpu_kernel void @local_workgroup_acquire_monotonic_cmpxchg( ; GFX6-LABEL: local_workgroup_acquire_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1173,8 +1173,8 @@ define amdgpu_kernel void @local_workgroup_release_monotonic_cmpxchg( ; GFX6-LABEL: local_workgroup_release_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1241,8 +1241,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_monotonic_cmpxchg( ; GFX6-LABEL: local_workgroup_acq_rel_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1316,8 +1316,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_monotonic_cmpxchg( ; GFX6-LABEL: local_workgroup_seq_cst_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1391,8 +1391,8 @@ define amdgpu_kernel void @local_workgroup_acquire_acquire_cmpxchg( ; GFX6-LABEL: local_workgroup_acquire_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1460,8 +1460,8 @@ define amdgpu_kernel void @local_workgroup_release_acquire_cmpxchg( ; GFX6-LABEL: local_workgroup_release_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1535,8 +1535,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_acquire_cmpxchg( ; GFX6-LABEL: local_workgroup_acq_rel_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1610,8 +1610,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_acquire_cmpxchg( ; GFX6-LABEL: local_workgroup_seq_cst_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1685,8 +1685,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_seq_cst_cmpxchg( ; GFX6-LABEL: local_workgroup_seq_cst_seq_cst_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1760,8 +1760,8 @@ define amdgpu_kernel void @local_workgroup_acquire_monotonic_ret_cmpxchg( ; GFX6-LABEL: local_workgroup_acquire_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1835,8 +1835,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_monotonic_ret_cmpxchg( ; GFX6-LABEL: local_workgroup_acq_rel_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1916,8 +1916,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_monotonic_ret_cmpxchg( ; GFX6-LABEL: local_workgroup_seq_cst_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -1997,8 +1997,8 @@ define amdgpu_kernel void @local_workgroup_acquire_acquire_ret_cmpxchg( ; GFX6-LABEL: local_workgroup_acquire_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -2072,8 +2072,8 @@ define amdgpu_kernel void @local_workgroup_release_acquire_ret_cmpxchg( ; GFX6-LABEL: local_workgroup_release_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -2153,8 +2153,8 @@ define amdgpu_kernel void @local_workgroup_acq_rel_acquire_ret_cmpxchg( ; GFX6-LABEL: local_workgroup_acq_rel_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -2234,8 +2234,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_acquire_ret_cmpxchg( ; GFX6-LABEL: local_workgroup_seq_cst_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -2315,8 +2315,8 @@ define amdgpu_kernel void @local_workgroup_seq_cst_seq_cst_ret_cmpxchg( ; GFX6-LABEL: local_workgroup_seq_cst_seq_cst_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -2396,13 +2396,13 @@ define amdgpu_kernel void @local_workgroup_one_as_unordered_load( ; GFX6-LABEL: local_workgroup_one_as_unordered_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm @@ -2462,13 +2462,13 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_load( ; GFX6-LABEL: local_workgroup_one_as_monotonic_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm @@ -2528,13 +2528,13 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_load( ; GFX6-LABEL: local_workgroup_one_as_acquire_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm @@ -2594,13 +2594,13 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_load( ; GFX6-LABEL: local_workgroup_one_as_seq_cst_load: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: ds_read_b32 v0, v0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v1, v0 ; GFX6-NEXT: s_endpgm @@ -2660,12 +2660,12 @@ define amdgpu_kernel void @local_workgroup_one_as_unordered_store( ; GFX6-LABEL: local_workgroup_one_as_unordered_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -2715,12 +2715,12 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_store( ; GFX6-LABEL: local_workgroup_one_as_monotonic_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -2770,12 +2770,12 @@ define amdgpu_kernel void @local_workgroup_one_as_release_store( ; GFX6-LABEL: local_workgroup_one_as_release_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -2825,12 +2825,12 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_store( ; GFX6-LABEL: local_workgroup_one_as_seq_cst_store: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s1 ; GFX6-NEXT: ds_write_b32 v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -2880,12 +2880,12 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_atomicrmw( ; GFX6-LABEL: local_workgroup_one_as_monotonic_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -2935,12 +2935,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_atomicrmw( ; GFX6-LABEL: local_workgroup_one_as_acquire_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -2990,12 +2990,12 @@ define amdgpu_kernel void @local_workgroup_one_as_release_atomicrmw( ; GFX6-LABEL: local_workgroup_one_as_release_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -3045,12 +3045,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_atomicrmw( ; GFX6-LABEL: local_workgroup_one_as_acq_rel_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -3100,12 +3100,12 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_atomicrmw( ; GFX6-LABEL: local_workgroup_one_as_seq_cst_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 ; GFX6-NEXT: s_endpgm ; @@ -3155,12 +3155,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_ret_atomicrmw( ; GFX6-LABEL: local_workgroup_one_as_acquire_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -3221,12 +3221,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_ret_atomicrmw( ; GFX6-LABEL: local_workgroup_one_as_acq_rel_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -3287,12 +3287,12 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_ret_atomicrmw( ; GFX6-LABEL: local_workgroup_one_as_seq_cst_ret_atomicrmw: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xa +; GFX6-NEXT: s_load_dword s0, s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s1, s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s2 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 ; GFX6-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: ds_write_b32 v0, v1 @@ -3353,8 +3353,8 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_monotonic_cmpxchg( ; GFX6-LABEL: local_workgroup_one_as_monotonic_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3415,8 +3415,8 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_monotonic_cmpxchg( ; GFX6-LABEL: local_workgroup_one_as_acquire_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3477,8 +3477,8 @@ define amdgpu_kernel void @local_workgroup_one_as_release_monotonic_cmpxchg( ; GFX6-LABEL: local_workgroup_one_as_release_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3539,8 +3539,8 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_monotonic_cmpxchg( ; GFX6-LABEL: local_workgroup_one_as_acq_rel_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3601,8 +3601,8 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_monotonic_cmpxchg( ; GFX6-LABEL: local_workgroup_one_as_seq_cst_monotonic_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3663,8 +3663,8 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_acquire_cmpxchg( ; GFX6-LABEL: local_workgroup_one_as_acquire_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3725,8 +3725,8 @@ define amdgpu_kernel void @local_workgroup_one_as_release_acquire_cmpxchg( ; GFX6-LABEL: local_workgroup_one_as_release_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3787,8 +3787,8 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_acquire_cmpxchg( ; GFX6-LABEL: local_workgroup_one_as_acq_rel_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3849,8 +3849,8 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_acquire_cmpxchg( ; GFX6-LABEL: local_workgroup_one_as_seq_cst_acquire_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3911,8 +3911,8 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_seq_cst_cmpxchg( ; GFX6-LABEL: local_workgroup_one_as_seq_cst_seq_cst_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -3973,8 +3973,8 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_monotonic_ret_cmpxchg( ; GFX6-LABEL: local_workgroup_one_as_acquire_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4047,8 +4047,8 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX6-LABEL: local_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4121,8 +4121,8 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX6-LABEL: local_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4195,8 +4195,8 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_acquire_ret_cmpxchg( ; GFX6-LABEL: local_workgroup_one_as_acquire_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4269,8 +4269,8 @@ define amdgpu_kernel void @local_workgroup_one_as_release_acquire_ret_cmpxchg( ; GFX6-LABEL: local_workgroup_one_as_release_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4343,8 +4343,8 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX6-LABEL: local_workgroup_one_as_acq_rel_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4417,8 +4417,8 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX6-LABEL: local_workgroup_one_as_seq_cst_acquire_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 @@ -4491,8 +4491,8 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX6-LABEL: local_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_load_dword s2, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xa +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x1 ; GFX6-NEXT: s_mov_b32 m0, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: v_mov_b32_e32 v0, s2 Index: llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll +++ llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX6 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX7 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-WGP %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+cumode -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-CU %s @@ -8,18 +8,16 @@ define amdgpu_kernel void @private_nontemporal_load_0( ; GFX6-LABEL: private_nontemporal_load_0: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GFX6-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s10, -1 -; GFX6-NEXT: s_mov_b32 s11, 0xe8f000 -; GFX6-NEXT: s_add_u32 s8, s8, s3 +; GFX6-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX6-NEXT: s_mov_b64 s[8:9], s[0:1] +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_add_u32 s8, s8, s7 ; GFX6-NEXT: s_addc_u32 s9, s9, 0 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s6 ; GFX6-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen glc slc -; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 @@ -103,19 +101,17 @@ define amdgpu_kernel void @private_nontemporal_load_1( ; GFX6-LABEL: private_nontemporal_load_1: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GFX6-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; GFX6-NEXT: s_load_dword s4, s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; GFX6-NEXT: s_mov_b32 s10, -1 -; GFX6-NEXT: s_mov_b32 s11, 0xe8f000 -; GFX6-NEXT: s_add_u32 s8, s8, s3 +; GFX6-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX6-NEXT: s_mov_b64 s[8:9], s[0:1] +; GFX6-NEXT: s_load_dword s6, s[4:5], 0x0 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2 +; GFX6-NEXT: s_add_u32 s8, s8, s7 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX6-NEXT: s_addc_u32 s9, s9, 0 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_add_i32_e32 v0, vcc, s4, v0 +; GFX6-NEXT: v_add_i32_e32 v0, vcc, s6, v0 ; GFX6-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen glc slc -; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_mov_b32 s3, 0x100f000 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 @@ -203,20 +199,18 @@ define amdgpu_kernel void @private_nontemporal_store_0( ; GFX6-LABEL: private_nontemporal_store_0: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_mov_b32 s4, SCRATCH_RSRC_DWORD0 -; GFX6-NEXT: s_mov_b32 s5, SCRATCH_RSRC_DWORD1 -; GFX6-NEXT: s_mov_b32 s6, -1 -; GFX6-NEXT: s_mov_b32 s7, 0xe8f000 -; GFX6-NEXT: s_add_u32 s4, s4, s3 -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb -; GFX6-NEXT: s_addc_u32 s5, s5, 0 +; GFX6-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX6-NEXT: s_mov_b64 s[8:9], s[0:1] +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x2 +; GFX6-NEXT: s_add_u32 s8, s8, s7 +; GFX6-NEXT: s_addc_u32 s9, s9, 0 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_load_dword s1, s[2:3], 0x0 -; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 +; GFX6-NEXT: v_mov_b32_e32 v1, s2 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, s1 -; GFX6-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen glc slc +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen glc slc ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: private_nontemporal_store_0: @@ -296,21 +290,19 @@ define amdgpu_kernel void @private_nontemporal_store_1( ; GFX6-LABEL: private_nontemporal_store_1: ; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_mov_b32 s4, SCRATCH_RSRC_DWORD0 -; GFX6-NEXT: s_mov_b32 s5, SCRATCH_RSRC_DWORD1 -; GFX6-NEXT: s_mov_b32 s6, -1 -; GFX6-NEXT: s_mov_b32 s7, 0xe8f000 -; GFX6-NEXT: s_add_u32 s4, s4, s3 -; GFX6-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 -; GFX6-NEXT: s_load_dword s0, s[0:1], 0xb +; GFX6-NEXT: s_mov_b64 s[10:11], s[2:3] +; GFX6-NEXT: s_mov_b64 s[8:9], s[0:1] +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX6-NEXT: s_load_dword s2, s[4:5], 0x2 +; GFX6-NEXT: s_add_u32 s8, s8, s7 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX6-NEXT: s_addc_u32 s5, s5, 0 +; GFX6-NEXT: s_addc_u32 s9, s9, 0 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_load_dword s1, s[2:3], 0x0 -; GFX6-NEXT: v_add_i32_e32 v0, vcc, s0, v0 +; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 +; GFX6-NEXT: v_add_i32_e32 v0, vcc, s2, v0 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: buffer_store_dword v1, v0, s[4:7], 0 offen glc slc +; GFX6-NEXT: v_mov_b32_e32 v1, s0 +; GFX6-NEXT: buffer_store_dword v1, v0, s[8:11], 0 offen glc slc ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: private_nontemporal_store_1: