Index: lib/Target/AMDGPU/AMDGPUInstructions.td =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructions.td +++ lib/Target/AMDGPU/AMDGPUInstructions.td @@ -497,6 +497,7 @@ defm atomic_load_umin : ret_noret_binary_atomic_op; defm atomic_load_xor : ret_noret_binary_atomic_op; defm atomic_load_fadd : ret_noret_binary_atomic_op; +defm AMDGPUatomic_cmp_swap : ret_noret_binary_atomic_op; def store_hi16_private : StoreHi16 , PrivateAddress; @@ -569,19 +570,6 @@ defm atomic_cmp_swap_region_m0 : ternary_atomic_op; } -let AddressSpaces = StoreAddress_global.AddrSpaces in { -def AMDGPUatomic_cmp_swap_global : PatFrag< - (ops node:$ptr, node:$value), - (AMDGPUatomic_cmp_swap node:$ptr, node:$value)>; - -defm atomic_cmp_swap_global : ternary_atomic_op; -} - -class global_binary_atomic_op_frag : PatFrag< - (ops node:$ptr, node:$value), - (atomic_op node:$ptr, node:$value), - [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>; - // Legacy. def atomic_cmp_swap_global_noret : PatFrag< (ops node:$ptr, node:$cmp, node:$value), Index: lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -2949,7 +2949,8 @@ case AMDGPU::G_ATOMICRMW_UMAX: case AMDGPU::G_ATOMICRMW_UMIN: case AMDGPU::G_ATOMICRMW_FADD: - case AMDGPU::G_ATOMIC_CMPXCHG: { + case AMDGPU::G_ATOMIC_CMPXCHG: + case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG: { return getDefaultMappingAllVGPR(MI); } case AMDGPU::G_BRCOND: { Index: lib/Target/AMDGPU/FLATInstructions.td =================================================================== --- lib/Target/AMDGPU/FLATInstructions.td +++ lib/Target/AMDGPU/FLATInstructions.td @@ -370,27 +370,6 @@ FLAT_Global_Atomic_Pseudo_NO_RTN, FLAT_Global_Atomic_Pseudo_RTN; -class flat_binary_atomic_op : PatFrag< - (ops node:$ptr, node:$value), - (atomic_op node:$ptr, node:$value), - [{return cast(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;}] ->; - -def atomic_cmp_swap_flat : flat_binary_atomic_op; -def atomic_swap_flat : flat_binary_atomic_op; -def atomic_add_flat : flat_binary_atomic_op; -def atomic_and_flat : flat_binary_atomic_op; -def atomic_max_flat : flat_binary_atomic_op; -def atomic_min_flat : flat_binary_atomic_op; -def atomic_or_flat : flat_binary_atomic_op; -def atomic_sub_flat : flat_binary_atomic_op; -def atomic_umax_flat : flat_binary_atomic_op; -def atomic_umin_flat : flat_binary_atomic_op; -def atomic_xor_flat : flat_binary_atomic_op; -def atomic_inc_flat : flat_binary_atomic_op; -def atomic_dec_flat : flat_binary_atomic_op; - - //===----------------------------------------------------------------------===// // Flat Instructions @@ -425,84 +404,84 @@ } defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap", - VGPR_32, i32, atomic_cmp_swap_flat, + VGPR_32, i32, AMDGPUatomic_cmp_swap_flat_32, v2i32, VReg_64>; defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2", - VReg_64, i64, atomic_cmp_swap_flat, + VReg_64, i64, AMDGPUatomic_cmp_swap_flat_64, v2i64, VReg_128>; defm FLAT_ATOMIC_SWAP : FLAT_Atomic_Pseudo <"flat_atomic_swap", - VGPR_32, i32, atomic_swap_flat>; + VGPR_32, i32, atomic_swap_flat_32>; defm FLAT_ATOMIC_SWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2", - VReg_64, i64, atomic_swap_flat>; + VReg_64, i64, atomic_swap_flat_64>; defm FLAT_ATOMIC_ADD : FLAT_Atomic_Pseudo <"flat_atomic_add", - VGPR_32, i32, atomic_add_flat>; + VGPR_32, i32, atomic_load_add_flat_32>; defm FLAT_ATOMIC_SUB : FLAT_Atomic_Pseudo <"flat_atomic_sub", - VGPR_32, i32, atomic_sub_flat>; + VGPR_32, i32, atomic_load_sub_flat_32>; defm FLAT_ATOMIC_SMIN : FLAT_Atomic_Pseudo <"flat_atomic_smin", - VGPR_32, i32, atomic_min_flat>; + VGPR_32, i32, atomic_load_min_flat_32>; defm FLAT_ATOMIC_UMIN : FLAT_Atomic_Pseudo <"flat_atomic_umin", - VGPR_32, i32, atomic_umin_flat>; + VGPR_32, i32, atomic_load_umin_flat_32>; defm FLAT_ATOMIC_SMAX : FLAT_Atomic_Pseudo <"flat_atomic_smax", - VGPR_32, i32, atomic_max_flat>; + VGPR_32, i32, atomic_load_max_flat_32>; defm FLAT_ATOMIC_UMAX : FLAT_Atomic_Pseudo <"flat_atomic_umax", - VGPR_32, i32, atomic_umax_flat>; + VGPR_32, i32, atomic_load_umax_flat_32>; defm FLAT_ATOMIC_AND : FLAT_Atomic_Pseudo <"flat_atomic_and", - VGPR_32, i32, atomic_and_flat>; + VGPR_32, i32, atomic_load_and_flat_32>; defm FLAT_ATOMIC_OR : FLAT_Atomic_Pseudo <"flat_atomic_or", - VGPR_32, i32, atomic_or_flat>; + VGPR_32, i32, atomic_load_or_flat_32>; defm FLAT_ATOMIC_XOR : FLAT_Atomic_Pseudo <"flat_atomic_xor", - VGPR_32, i32, atomic_xor_flat>; + VGPR_32, i32, atomic_load_xor_flat_32>; defm FLAT_ATOMIC_INC : FLAT_Atomic_Pseudo <"flat_atomic_inc", - VGPR_32, i32, atomic_inc_flat>; + VGPR_32, i32, atomic_inc_flat_32>; defm FLAT_ATOMIC_DEC : FLAT_Atomic_Pseudo <"flat_atomic_dec", - VGPR_32, i32, atomic_dec_flat>; + VGPR_32, i32, atomic_dec_flat_32>; defm FLAT_ATOMIC_ADD_X2 : FLAT_Atomic_Pseudo <"flat_atomic_add_x2", - VReg_64, i64, atomic_add_flat>; + VReg_64, i64, atomic_load_add_flat_64>; defm FLAT_ATOMIC_SUB_X2 : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2", - VReg_64, i64, atomic_sub_flat>; + VReg_64, i64, atomic_load_sub_flat_64>; defm FLAT_ATOMIC_SMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2", - VReg_64, i64, atomic_min_flat>; + VReg_64, i64, atomic_load_min_flat_64>; defm FLAT_ATOMIC_UMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2", - VReg_64, i64, atomic_umin_flat>; + VReg_64, i64, atomic_load_umin_flat_64>; defm FLAT_ATOMIC_SMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2", - VReg_64, i64, atomic_max_flat>; + VReg_64, i64, atomic_load_max_flat_64>; defm FLAT_ATOMIC_UMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2", - VReg_64, i64, atomic_umax_flat>; + VReg_64, i64, atomic_load_umax_flat_64>; defm FLAT_ATOMIC_AND_X2 : FLAT_Atomic_Pseudo <"flat_atomic_and_x2", - VReg_64, i64, atomic_and_flat>; + VReg_64, i64, atomic_load_and_flat_64>; defm FLAT_ATOMIC_OR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_or_x2", - VReg_64, i64, atomic_or_flat>; + VReg_64, i64, atomic_load_or_flat_64>; defm FLAT_ATOMIC_XOR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2", - VReg_64, i64, atomic_xor_flat>; + VReg_64, i64, atomic_load_xor_flat_64>; defm FLAT_ATOMIC_INC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2", - VReg_64, i64, atomic_inc_flat>; + VReg_64, i64, atomic_inc_flat_64>; defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2", - VReg_64, i64, atomic_dec_flat>; + VReg_64, i64, atomic_dec_flat_64>; // GFX7-, GFX10-only flat instructions. let SubtargetPredicate = isGFX7GFX10 in { @@ -556,11 +535,11 @@ let is_flat_global = 1 in { defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap", - VGPR_32, i32, AMDGPUatomic_cmp_swap_global, + VGPR_32, i32, AMDGPUatomic_cmp_swap_global_32, v2i32, VReg_64>; defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2", - VReg_64, i64, AMDGPUatomic_cmp_swap_global, + VReg_64, i64, AMDGPUatomic_cmp_swap_global_64, v2i64, VReg_128>; defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap", @@ -813,7 +792,7 @@ def : FlatAtomicPat ; def : FlatAtomicPat ; def : FlatAtomicPat ; -def : FlatAtomicPat ; +def : FlatAtomicPat ; def : FlatAtomicPat ; def : FlatAtomicPat ; @@ -827,7 +806,7 @@ def : FlatAtomicPat ; def : FlatAtomicPat ; def : FlatAtomicPat ; -def : FlatAtomicPat ; +def : FlatAtomicPat ; def : FlatAtomicPat ; def : FlatStorePat ; @@ -923,7 +902,7 @@ def : FlatSignedAtomicPat ; def : FlatSignedAtomicPat ; def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; def : FlatSignedAtomicPat ; def : FlatSignedAtomicPat ; @@ -937,7 +916,7 @@ def : FlatSignedAtomicPat ; def : FlatSignedAtomicPat ; def : FlatSignedAtomicPat ; -def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; def : FlatSignedAtomicPat ; def : FlatAtomicPatNoRtn ; Index: test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir +++ test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir @@ -1,9 +1,9 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s -# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s -# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s -# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7 %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s --- name: amdgpu_atomic_cmpxchg_s32_global @@ -14,6 +14,30 @@ bb.0: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 + ; GFX6-LABEL: name: amdgpu_atomic_cmpxchg_s32_global + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GFX6: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32) + ; GFX6: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:vgpr_32(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store seq_cst 4, addrspace 1) + ; GFX6: $vgpr0 = COPY [[AMDGPU_ATOMIC_CMPXCHG]](s32) + ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) + ; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -32,6 +56,42 @@ bb.0: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 + ; GFX6-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GFX6: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32) + ; GFX6: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 4 + ; GFX6: [[GEP:%[0-9]+]]:vgpr(p1) = G_GEP [[COPY]], [[C]](s64) + ; GFX6: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:vgpr_32(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[GEP]](p1), [[BUILD_VECTOR]] :: (load store seq_cst 4, addrspace 1) + ; GFX6: $vgpr0 = COPY [[AMDGPU_ATOMIC_CMPXCHG]](s32) + ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec + ; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 4, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) + ; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -52,6 +112,30 @@ bb.0: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; GFX6-LABEL: name: amdgpu_atomic_cmpxchg_s64_global + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GFX6: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr4_vgpr5 + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s64>) = G_BUILD_VECTOR [[COPY1]](s64), [[COPY2]](s64) + ; GFX6: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:vreg_64(s64) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store seq_cst 8, addrspace 1) + ; GFX6: $vgpr0_vgpr1 = COPY [[AMDGPU_ATOMIC_CMPXCHG]](s64) + ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_global + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX7: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 + ; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1) + ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_global + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 + ; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1) + ; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = COPY $vgpr4_vgpr5 @@ -70,6 +154,42 @@ bb.0: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; GFX6-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GFX6: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr4_vgpr5 + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s64>) = G_BUILD_VECTOR [[COPY1]](s64), [[COPY2]](s64) + ; GFX6: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 4 + ; GFX6: [[GEP:%[0-9]+]]:vgpr(p1) = G_GEP [[COPY]], [[C]](s64) + ; GFX6: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:vreg_64(s64) = G_AMDGPU_ATOMIC_CMPXCHG [[GEP]](p1), [[BUILD_VECTOR]] :: (load store seq_cst 8, addrspace 1) + ; GFX6: $vgpr0_vgpr1 = COPY [[AMDGPU_ATOMIC_CMPXCHG]](s64) + ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX7: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec + ; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1) + ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] + ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 + ; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 4, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1) + ; GFX9: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = COPY $vgpr4_vgpr5 @@ -90,6 +210,52 @@ bb.0: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 + ; GFX6-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GFX6: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32) + ; GFX6: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 -4 + ; GFX6: [[GEP:%[0-9]+]]:vgpr(p1) = G_GEP [[COPY]], [[C]](s64) + ; GFX6: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:vgpr_32(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[GEP]](p1), [[BUILD_VECTOR]] :: (load store seq_cst 4, addrspace 1) + ; GFX6: $vgpr0 = COPY [[AMDGPU_ATOMIC_CMPXCHG]](s32) + ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec + ; GFX7: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec + ; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) + ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] + ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4 + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX9: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1 + ; GFX9: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec + ; GFX9: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) + ; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]] %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -100,3 +266,79 @@ $vgpr0 = COPY %6 ... + +--- +name: amdgpu_atomic_cmpxchg_s32_global_nortn +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 + + ; GFX6-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GFX6: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32) + ; GFX6: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store seq_cst 4, addrspace 1) + ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) + ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s32) = COPY $vgpr2 + %2:vgpr(s32) = COPY $vgpr3 + %3:vgpr(<2 x s32>) = G_BUILD_VECTOR %1, %2 + %4:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst 4, addrspace 1) + +... + +--- +name: amdgpu_atomic_cmpxchg_s64_global_nortn +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + + ; GFX6-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn + ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; GFX6: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY $vgpr4_vgpr5 + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<2 x s64>) = G_BUILD_VECTOR [[COPY1]](s64), [[COPY2]](s64) + ; GFX6: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store seq_cst 8, addrspace 1) + ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn + ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX7: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX7: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 + ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 + ; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1) + ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn + ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 + ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 + ; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 + ; GFX9: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1) + %0:vgpr(p1) = COPY $vgpr0_vgpr1 + %1:vgpr(s64) = COPY $vgpr2_vgpr3 + %2:vgpr(s64) = COPY $vgpr4_vgpr5 + %3:vgpr(<2 x s64>) = G_BUILD_VECTOR %1, %2 + %4:vgpr(s64) = G_AMDGPU_ATOMIC_CMPXCHG %0, %3 :: (load store seq_cst 8, addrspace 1) + +... Index: test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir @@ -11,9 +11,10 @@ ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p1), [[COPY1]], [[COPY2]] :: (load store syncscope("agent-one-as") monotonic monotonic 4, addrspace 1) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s32), [[COPY1]] - ; CHECK: S_ENDPGM 0, implicit [[ATOMIC_CMPXCHG]](s32), implicit [[ICMP]](s1) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32) + ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic 4, addrspace 1) + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_ATOMIC_CMPXCHG]](s32), [[COPY1]] + ; CHECK: S_ENDPGM 0, implicit [[AMDGPU_ATOMIC_CMPXCHG]](s32), implicit [[ICMP]](s1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s32) = COPY $vgpr3 @@ -32,9 +33,10 @@ ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p0), [[COPY1]], [[COPY2]] :: (load store syncscope("agent-one-as") monotonic monotonic 4) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s32), [[COPY1]] - ; CHECK: S_ENDPGM 0, implicit [[ATOMIC_CMPXCHG]](s32), implicit [[ICMP]](s1) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32) + ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p0), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic 4) + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_ATOMIC_CMPXCHG]](s32), [[COPY1]] + ; CHECK: S_ENDPGM 0, implicit [[AMDGPU_ATOMIC_CMPXCHG]](s32), implicit [[ICMP]](s1) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s32) = COPY $vgpr3 @@ -74,9 +76,10 @@ ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; CHECK: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p1), [[COPY1]], [[COPY2]] :: (load store syncscope("agent-one-as") monotonic monotonic 8, addrspace 1) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s64), [[COPY1]] - ; CHECK: S_ENDPGM 0, implicit [[ATOMIC_CMPXCHG]](s64), implicit [[ICMP]](s1) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY2]](s64), [[COPY1]](s64) + ; CHECK: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic 8, addrspace 1) + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_ATOMIC_CMPXCHG]](s64), [[COPY1]] + ; CHECK: S_ENDPGM 0, implicit [[AMDGPU_ATOMIC_CMPXCHG]](s64), implicit [[ICMP]](s1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = COPY $vgpr4_vgpr5