Index: lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -138,6 +138,10 @@ bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr, SDValue &Offset, SDValue &SLC) const; + bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr, + SDValue &Offset, SDValue &SLC) const; + + template bool SelectFlatOffset(SDValue Addr, SDValue &VAddr, SDValue &Offset, SDValue &SLC) const; @@ -1278,6 +1282,7 @@ return true; } +template bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr, SDValue &VAddr, SDValue &Offset, @@ -1288,8 +1293,10 @@ CurDAG->isBaseWithConstantOffset(Addr)) { SDValue N0 = Addr.getOperand(0); SDValue N1 = Addr.getOperand(1); - uint64_t COffsetVal = cast(N1)->getZExtValue(); - if (isUInt<12>(COffsetVal)) { + int64_t COffsetVal = cast(N1)->getSExtValue(); + + if ((IsSigned && isInt<13>(COffsetVal)) || + (!IsSigned && isUInt<12>(COffsetVal))) { Addr = N0; OffsetVal = COffsetVal; } @@ -1306,7 +1313,14 @@ SDValue &VAddr, SDValue &Offset, SDValue &SLC) const { - return SelectFlatOffset(Addr, VAddr, Offset, SLC); + return SelectFlatOffset(Addr, VAddr, Offset, SLC); +} + +bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr, + SDValue &VAddr, + SDValue &Offset, + SDValue &SLC) const { + return SelectFlatOffset(Addr, VAddr, Offset, SLC); } bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, Index: lib/Target/AMDGPU/AMDGPUInstructions.td =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructions.td +++ lib/Target/AMDGPU/AMDGPUInstructions.td @@ -247,6 +247,7 @@ >; def global_load : GlobalLoad ; +def global_atomic_load : GlobalLoad; // Global address space stores class GlobalStore : GlobalMemOp < Index: lib/Target/AMDGPU/FLATInstructions.td =================================================================== --- lib/Target/AMDGPU/FLATInstructions.td +++ lib/Target/AMDGPU/FLATInstructions.td @@ -8,7 +8,10 @@ //===----------------------------------------------------------------------===// def FLATAtomic : ComplexPattern; -def FLATOffset : ComplexPattern; +def FLATOffset : ComplexPattern", [], [], -10>; + +def FLATOffsetSigned : ComplexPattern", [], [], -10>; +def FLATSignedAtomic : ComplexPattern; //===----------------------------------------------------------------------===// // FLAT classes @@ -151,9 +154,9 @@ RegisterClass vdst_rc, ValueType vt, SDPatternOperator atomic = null_frag, + bit HasSignedOffset = 0, ValueType data_vt = vt, - RegisterClass data_rc = vdst_rc, - bit HasSignedOffset = 0> { + RegisterClass data_rc = vdst_rc> { def "" : FLAT_Pseudo , + (atomic !if(HasSignedOffset, + (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), + (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc)), + data_vt:$vdata))]>, AtomicNoRet { let mayLoad = 1; let mayStore = 1; @@ -232,11 +238,11 @@ def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>; defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap", - VGPR_32, i32, atomic_cmp_swap_flat, + VGPR_32, i32, atomic_cmp_swap_flat, 0, v2i32, VReg_64>; defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2", - VReg_64, i64, atomic_cmp_swap_flat, + VReg_64, i64, atomic_cmp_swap_flat, 0, v2i64, VReg_128>; defm FLAT_ATOMIC_SWAP : FLAT_Atomic_Pseudo <"flat_atomic_swap", @@ -314,10 +320,10 @@ let SubtargetPredicate = isCI in { // CI Only flat instructions : FIXME Only? defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap", - VGPR_32, f32, null_frag, v2f32, VReg_64>; + VGPR_32, f32, null_frag, 0, v2f32, VReg_64>; defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2", - VReg_64, f64, null_frag, v2f64, VReg_128>; + VReg_64, f64, null_frag, 0, v2f64, VReg_128>; defm FLAT_ATOMIC_FMIN : FLAT_Atomic_Pseudo <"flat_atomic_fmin", VGPR_32, f32>; @@ -350,6 +356,89 @@ def GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>; def GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>; + +let is_flat_global = 1 in { +defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"global_atomic_cmpswap", + VGPR_32, i32, AMDGPUatomic_cmp_swap_global, 1, + v2i32, VReg_64>; + +defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"global_atomic_cmpswap_x2", + VReg_64, i64, AMDGPUatomic_cmp_swap_global, 1, + v2i64, VReg_128>; + +defm GLOBAL_ATOMIC_SWAP : FLAT_Atomic_Pseudo <"global_atomic_swap", + VGPR_32, i32, atomic_swap_global, 1>; + +defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Atomic_Pseudo <"global_atomic_swap_x2", + VReg_64, i64, atomic_swap_global, 1>; + +defm GLOBAL_ATOMIC_ADD : FLAT_Atomic_Pseudo <"global_atomic_add", + VGPR_32, i32, atomic_add_global, 1>; + +defm GLOBAL_ATOMIC_SUB : FLAT_Atomic_Pseudo <"global_atomic_sub", + VGPR_32, i32, atomic_sub_global, 1>; + +defm GLOBAL_ATOMIC_SMIN : FLAT_Atomic_Pseudo <"global_atomic_smin", + VGPR_32, i32, atomic_min_global, 1>; + +defm GLOBAL_ATOMIC_UMIN : FLAT_Atomic_Pseudo <"global_atomic_umin", + VGPR_32, i32, atomic_umin_global, 1>; + +defm GLOBAL_ATOMIC_SMAX : FLAT_Atomic_Pseudo <"global_atomic_smax", + VGPR_32, i32, atomic_max_global, 1>; + +defm GLOBAL_ATOMIC_UMAX : FLAT_Atomic_Pseudo <"global_atomic_umax", + VGPR_32, i32, atomic_umax_global, 1>; + +defm GLOBAL_ATOMIC_AND : FLAT_Atomic_Pseudo <"global_atomic_and", + VGPR_32, i32, atomic_and_global, 1>; + +defm GLOBAL_ATOMIC_OR : FLAT_Atomic_Pseudo <"global_atomic_or", + VGPR_32, i32, atomic_or_global, 1>; + +defm GLOBAL_ATOMIC_XOR : FLAT_Atomic_Pseudo <"global_atomic_xor", + VGPR_32, i32, atomic_xor_global, 1>; + +defm GLOBAL_ATOMIC_INC : FLAT_Atomic_Pseudo <"global_atomic_inc", + VGPR_32, i32, atomic_inc_global, 1>; + +defm GLOBAL_ATOMIC_DEC : FLAT_Atomic_Pseudo <"global_atomic_dec", + VGPR_32, i32, atomic_dec_global, 1>; + +defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Atomic_Pseudo <"global_atomic_add_x2", + VReg_64, i64, atomic_add_global, 1>; + +defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Atomic_Pseudo <"global_atomic_sub_x2", + VReg_64, i64, atomic_sub_global, 1>; + +defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Atomic_Pseudo <"global_atomic_smin_x2", + VReg_64, i64, atomic_min_global, 1>; + +defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Atomic_Pseudo <"global_atomic_umin_x2", + VReg_64, i64, atomic_umin_global, 1>; + +defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Atomic_Pseudo <"global_atomic_smax_x2", + VReg_64, i64, atomic_max_global, 1>; + +defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Atomic_Pseudo <"global_atomic_umax_x2", + VReg_64, i64, atomic_umax_global, 1>; + +defm GLOBAL_ATOMIC_AND_X2 : FLAT_Atomic_Pseudo <"global_atomic_and_x2", + VReg_64, i64, atomic_and_global, 1>; + +defm GLOBAL_ATOMIC_OR_X2 : FLAT_Atomic_Pseudo <"global_atomic_or_x2", + VReg_64, i64, atomic_or_global, 1>; + +defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Atomic_Pseudo <"global_atomic_xor_x2", + VReg_64, i64, atomic_xor_global, 1>; + +defm GLOBAL_ATOMIC_INC_X2 : FLAT_Atomic_Pseudo <"global_atomic_inc_x2", + VReg_64, i64, atomic_inc_global, 1>; + +defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"global_atomic_dec_x2", + VReg_64, i64, atomic_dec_global, 1>; +} // End is_flat_global = 1 + } // End SubtargetPredicate = HasFlatGlobalInsts @@ -386,7 +475,7 @@ // Patterns for global loads with no offset. class FlatLoadPat : Pat < - (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))), + (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc))), (inst $vaddr, $offset, 0, $slc) >; @@ -395,8 +484,18 @@ (inst $vaddr, $offset, 1, $slc) >; +class FlatLoadSignedPat : Pat < + (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc))), + (inst $vaddr, $offset, 0, $slc) +>; + class FlatStorePat : Pat < - (node vt:$data, (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc)), + (node vt:$data, (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)), + (inst $vaddr, $data, $offset, 0, $slc) +>; + +class FlatStoreSignedPat : Pat < + (node vt:$data, (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)), (inst $vaddr, $data, $offset, 0, $slc) >; @@ -407,12 +506,25 @@ (inst $vaddr, $data, $offset, 1, $slc) >; +class FlatStoreSignedAtomicPat : Pat < + // atomic store follows atomic binop convention so the address comes + // first. + (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data), + (inst $vaddr, $data, $offset, 1, $slc) +>; + class FlatAtomicPat : Pat < (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)), (inst $vaddr, $data, $offset, $slc) >; +class FlatSignedAtomicPat : Pat < + (vt (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)), + (inst $vaddr, $data, $offset, $slc) +>; + let Predicates = [isCIVI] in { def : FlatLoadPat ; @@ -473,6 +585,65 @@ } +let Predicates = [HasFlatGlobalInsts], AddedComplexity = 10 in { + +def : FlatLoadSignedPat ; +def : FlatLoadSignedPat ; +def : FlatLoadSignedPat ; +def : FlatLoadSignedPat ; +def : FlatLoadSignedPat ; +def : FlatLoadSignedPat ; + + +def : FlatLoadSignedPat ; +def : FlatLoadSignedPat ; +def : FlatLoadSignedPat ; + +def : FlatLoadAtomicPat ; +def : FlatLoadAtomicPat ; + +def : FlatStoreSignedPat ; +def : FlatStoreSignedPat ; +def : FlatStoreSignedPat ; +def : FlatStoreSignedPat ; +def : FlatStoreSignedPat ; +def : FlatStoreSignedPat ; +def : FlatStoreSignedPat ; + +def : FlatStoreSignedAtomicPat ; +def : FlatStoreSignedAtomicPat ; + +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; + +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; +def : FlatSignedAtomicPat ; + +} // End Predicates = [HasFlatGlobalInsts] + + //===----------------------------------------------------------------------===// // Target //===----------------------------------------------------------------------===// @@ -619,3 +790,30 @@ def GLOBAL_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, GLOBAL_STORE_DWORDX2>; def GLOBAL_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, GLOBAL_STORE_DWORDX4>; def GLOBAL_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, GLOBAL_STORE_DWORDX3>; + +defm GLOBAL_ATOMIC_SWAP : FLAT_Real_Atomics_vi <0x40, GLOBAL_ATOMIC_SWAP>; +defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Real_Atomics_vi <0x41, GLOBAL_ATOMIC_CMPSWAP>; +defm GLOBAL_ATOMIC_ADD : FLAT_Real_Atomics_vi <0x42, GLOBAL_ATOMIC_ADD>; +defm GLOBAL_ATOMIC_SUB : FLAT_Real_Atomics_vi <0x43, GLOBAL_ATOMIC_SUB>; +defm GLOBAL_ATOMIC_SMIN : FLAT_Real_Atomics_vi <0x44, GLOBAL_ATOMIC_SMIN>; +defm GLOBAL_ATOMIC_UMIN : FLAT_Real_Atomics_vi <0x45, GLOBAL_ATOMIC_UMIN>; +defm GLOBAL_ATOMIC_SMAX : FLAT_Real_Atomics_vi <0x46, GLOBAL_ATOMIC_SMAX>; +defm GLOBAL_ATOMIC_UMAX : FLAT_Real_Atomics_vi <0x47, GLOBAL_ATOMIC_UMAX>; +defm GLOBAL_ATOMIC_AND : FLAT_Real_Atomics_vi <0x48, GLOBAL_ATOMIC_AND>; +defm GLOBAL_ATOMIC_OR : FLAT_Real_Atomics_vi <0x49, GLOBAL_ATOMIC_OR>; +defm GLOBAL_ATOMIC_XOR : FLAT_Real_Atomics_vi <0x4a, GLOBAL_ATOMIC_XOR>; +defm GLOBAL_ATOMIC_INC : FLAT_Real_Atomics_vi <0x4b, GLOBAL_ATOMIC_INC>; +defm GLOBAL_ATOMIC_DEC : FLAT_Real_Atomics_vi <0x4c, GLOBAL_ATOMIC_DEC>; +defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_vi <0x60, GLOBAL_ATOMIC_SWAP_X2>; +defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61, GLOBAL_ATOMIC_CMPSWAP_X2>; +defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Real_Atomics_vi <0x62, GLOBAL_ATOMIC_ADD_X2>; +defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Real_Atomics_vi <0x63, GLOBAL_ATOMIC_SUB_X2>; +defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_vi <0x64, GLOBAL_ATOMIC_SMIN_X2>; +defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_vi <0x65, GLOBAL_ATOMIC_UMIN_X2>; +defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_vi <0x66, GLOBAL_ATOMIC_SMAX_X2>; +defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_vi <0x67, GLOBAL_ATOMIC_UMAX_X2>; +defm GLOBAL_ATOMIC_AND_X2 : FLAT_Real_Atomics_vi <0x68, GLOBAL_ATOMIC_AND_X2>; +defm GLOBAL_ATOMIC_OR_X2 : FLAT_Real_Atomics_vi <0x69, GLOBAL_ATOMIC_OR_X2>; +defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Real_Atomics_vi <0x6a, GLOBAL_ATOMIC_XOR_X2>; +defm GLOBAL_ATOMIC_INC_X2 : FLAT_Real_Atomics_vi <0x6b, GLOBAL_ATOMIC_INC_X2>; +defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Real_Atomics_vi <0x6c, GLOBAL_ATOMIC_DEC_X2>; Index: test/CodeGen/AMDGPU/add.v2i16.ll =================================================================== --- test/CodeGen/AMDGPU/add.v2i16.ll +++ test/CodeGen/AMDGPU/add.v2i16.ll @@ -153,8 +153,8 @@ ; FIXME: Need to handle non-uniform case for function below (load without gep). ; GCN-LABEL: {{^}}v_test_add_v2i16_zext_to_v2i32: -; GFX9: flat_load_dword [[A:v[0-9]+]] -; GFX9: flat_load_dword [[B:v[0-9]+]] +; GFX9: global_load_dword [[A:v[0-9]+]] +; GFX9: global_load_dword [[B:v[0-9]+]] ; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[A]], [[B]] ; GFX9-DAG: v_and_b32_e32 v[[ELT0:[0-9]+]], 0xffff, [[ADD]] @@ -188,8 +188,8 @@ ; FIXME: Need to handle non-uniform case for function below (load without gep). ; GCN-LABEL: {{^}}v_test_add_v2i16_zext_to_v2i64: -; GFX9: flat_load_dword [[A:v[0-9]+]] -; GFX9: flat_load_dword [[B:v[0-9]+]] +; GFX9: global_load_dword [[A:v[0-9]+]] +; GFX9: global_load_dword [[B:v[0-9]+]] ; GFX9: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} ; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[A]], [[B]] @@ -223,8 +223,8 @@ ; FIXME: Need to handle non-uniform case for function below (load without gep). ; GCN-LABEL: {{^}}v_test_add_v2i16_sext_to_v2i32: -; GFX9: flat_load_dword [[A:v[0-9]+]] -; GFX9: flat_load_dword [[B:v[0-9]+]] +; GFX9: global_load_dword [[A:v[0-9]+]] +; GFX9: global_load_dword [[B:v[0-9]+]] ; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[A]], [[B]] ; GFX9-DAG: v_bfe_i32 v[[ELT0:[0-9]+]], [[ADD]], 0, 16 @@ -251,8 +251,8 @@ ; FIXME: Need to handle non-uniform case for function below (load without gep). ; GCN-LABEL: {{^}}v_test_add_v2i16_sext_to_v2i64: -; GCN: flat_load_dword -; GCN: flat_load_dword +; GCN: {{flat|global}}_load_dword +; GCN: {{flat|global}}_load_dword ; GFX9: v_pk_add_u16 ; GFX9: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} Index: test/CodeGen/AMDGPU/addrspacecast.ll =================================================================== --- test/CodeGen/AMDGPU/addrspacecast.ll +++ test/CodeGen/AMDGPU/addrspacecast.ll @@ -140,7 +140,7 @@ ; HSA-DAG: v_mov_b32_e32 v[[VPTRLO:[0-9]+]], s[[PTRLO]] ; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]] ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0 -; HSA: flat_store_dword v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}, [[K]] +; HSA: {{flat|global}}_store_dword v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}, [[K]] define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #0 { %ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(1)* store volatile i32 0, i32 addrspace(1)* %ftos @@ -169,7 +169,7 @@ ; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}} -; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]] +; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]] define amdgpu_kernel void @cast_0_group_to_flat_addrspacecast() #0 { %cast = addrspacecast i32 addrspace(3)* null to i32 addrspace(4)* store volatile i32 7, i32 addrspace(4)* %cast @@ -190,7 +190,7 @@ ; HSA: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} ; HSA: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}} ; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} -; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]] +; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]] define amdgpu_kernel void @cast_neg1_group_to_flat_addrspacecast() #0 { %cast = addrspacecast i32 addrspace(3)* inttoptr (i32 -1 to i32 addrspace(3)*) to i32 addrspace(4)* store volatile i32 7, i32 addrspace(4)* %cast @@ -215,7 +215,7 @@ ; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}} ; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} -; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]] +; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]] define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 { %cast = addrspacecast i32* null to i32 addrspace(4)* store volatile i32 7, i32 addrspace(4)* %cast @@ -235,7 +235,7 @@ ; specialize away generic pointer accesses. ; HSA-LABEL: {{^}}branch_use_flat_i32: -; HSA: flat_store_dword {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} +; HSA: {{flat|global}}_store_dword {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} ; HSA: s_endpgm define amdgpu_kernel void @branch_use_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %gptr, i32 addrspace(3)* %lptr, i32 %x, i32 %c) #0 { entry: @@ -267,9 +267,9 @@ ; GFX9: s_add_u32 flat_scratch_lo, s6, s9 ; GFX9: s_addc_u32 flat_scratch_hi, s7, 0 -; HSA: flat_store_dword +; HSA: {{flat|global}}_store_dword ; HSA: s_barrier -; HSA: flat_load_dword +; HSA: {{flat|global}}_load_dword define amdgpu_kernel void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 { %alloca = alloca i32, i32 9, align 4 %x = call i32 @llvm.amdgcn.workitem.id.x() #2 Index: test/CodeGen/AMDGPU/ashr.v2i16.ll =================================================================== --- test/CodeGen/AMDGPU/ashr.v2i16.ll +++ test/CodeGen/AMDGPU/ashr.v2i16.ll @@ -23,8 +23,8 @@ } ; GCN-LABEL: {{^}}v_ashr_v2i16: -; GCN: {{buffer|flat}}_load_dword [[LHS:v[0-9]+]] -; GCN: {{buffer|flat}}_load_dword [[RHS:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[LHS:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[RHS:v[0-9]+]] ; GFX9: v_pk_ashrrev_i16 [[RESULT:v[0-9]+]], [[RHS]], [[LHS]] ; VI: v_ashrrev_i16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} @@ -56,7 +56,7 @@ ; GCN-LABEL: {{^}}ashr_v_s_v2i16: ; GFX9: s_load_dword [[RHS:s[0-9]+]] -; GFX9: {{buffer|flat}}_load_dword [[LHS:v[0-9]+]] +; GFX9: {{buffer|flat|global}}_load_dword [[LHS:v[0-9]+]] ; GFX9: v_pk_ashrrev_i16 [[RESULT:v[0-9]+]], [[RHS]], [[LHS]] define amdgpu_kernel void @ashr_v_s_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -71,7 +71,7 @@ ; GCN-LABEL: {{^}}ashr_s_v_v2i16: ; GFX9: s_load_dword [[LHS:s[0-9]+]] -; GFX9: {{buffer|flat}}_load_dword [[RHS:v[0-9]+]] +; GFX9: {{buffer|flat|global}}_load_dword [[RHS:v[0-9]+]] ; GFX9: v_pk_ashrrev_i16 [[RESULT:v[0-9]+]], [[RHS]], [[LHS]] define amdgpu_kernel void @ashr_s_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -85,7 +85,7 @@ } ; GCN-LABEL: {{^}}ashr_imm_v_v2i16: -; GCN: {{buffer|flat}}_load_dword [[RHS:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[RHS:v[0-9]+]] ; GFX9: v_pk_ashrrev_i16 [[RESULT:v[0-9]+]], [[RHS]], -4 define amdgpu_kernel void @ashr_imm_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -99,7 +99,7 @@ } ; GCN-LABEL: {{^}}ashr_v_imm_v2i16: -; GCN: {{buffer|flat}}_load_dword [[LHS:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[LHS:v[0-9]+]] ; GFX9: v_pk_ashrrev_i16 [[RESULT:v[0-9]+]], 8, [[LHS]] define amdgpu_kernel void @ashr_v_imm_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -113,8 +113,8 @@ } ; GCN-LABEL: {{^}}v_ashr_v4i16: -; GCN: {{buffer|flat}}_load_dwordx2 -; GCN: {{buffer|flat}}_load_dwordx2 +; GCN: {{buffer|flat|global}}_load_dwordx2 +; GCN: {{buffer|flat|global}}_load_dwordx2 ; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} @@ -125,7 +125,7 @@ ; VI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; VI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; GCN: {{buffer|flat}}_store_dwordx2 +; GCN: {{buffer|flat|global}}_store_dwordx2 define amdgpu_kernel void @v_ashr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -140,10 +140,10 @@ } ; GCN-LABEL: {{^}}ashr_v_imm_v4i16: -; GCN: {{buffer|flat}}_load_dwordx2 +; GCN: {{buffer|flat|global}}_load_dwordx2 ; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, 8, v{{[0-9]+}} ; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, 8, v{{[0-9]+}} -; GCN: {{buffer|flat}}_store_dwordx2 +; GCN: {{buffer|flat|global}}_store_dwordx2 define amdgpu_kernel void @ashr_v_imm_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 Index: test/CodeGen/AMDGPU/extract_vector_elt-i16.ll =================================================================== --- test/CodeGen/AMDGPU/extract_vector_elt-i16.ll +++ test/CodeGen/AMDGPU/extract_vector_elt-i16.ll @@ -36,7 +36,7 @@ ; GCN-LABEL: {{^}}extract_vector_elt_v2i16_dynamic_vgpr: ; GCN-DAG: s_load_dword [[VEC:s[0-9]+]] -; GCN-DAG: {{flat|buffer}}_load_dword [[IDX:v[0-9]+]] +; GCN-DAG: {{flat|buffer|global}}_load_dword [[IDX:v[0-9]+]] ; GCN: v_lshlrev_b32_e32 [[IDX_SCALED:v[0-9]+]], 16, [[IDX]] ; SI: v_lshr_b32_e32 [[ELT:v[0-9]+]], [[VEC]], [[IDX_SCALED]] Index: test/CodeGen/AMDGPU/fabs.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fabs.f16.ll +++ test/CodeGen/AMDGPU/fabs.f16.ll @@ -9,7 +9,7 @@ ; GCN-LABEL: {{^}}s_fabs_free_f16: ; GCN: flat_load_ushort [[VAL:v[0-9]+]], ; GCN: v_and_b32_e32 [[RESULT:v[0-9]+]], 0x7fff, [[VAL]] -; GCN: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] +; GCN: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] define amdgpu_kernel void @s_fabs_free_f16(half addrspace(1)* %out, i16 %in) { %bc= bitcast i16 %in to half @@ -67,7 +67,7 @@ ; VI-DAG: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; VI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; GCN: flat_store_dwordx2 +; GCN: {{flat|global}}_store_dwordx2 define amdgpu_kernel void @s_fabs_v4f16(<4 x half> addrspace(1)* %out, <4 x half> %in) { %fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %in) store <4 x half> %fabs, <4 x half> addrspace(1)* %out @@ -95,7 +95,7 @@ } ; GCN-LABEL: {{^}}v_fabs_v2f16: -; GCN: flat_load_dword [[VAL:v[0-9]+]] +; GCN: {{flat|global}}_load_dword [[VAL:v[0-9]+]] ; GCN: v_and_b32_e32 v{{[0-9]+}}, 0x7fff7fff, [[VAL]] define amdgpu_kernel void @v_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -118,7 +118,7 @@ } ; GCN-LABEL: {{^}}v_fabs_fold_v2f16: -; GCN: flat_load_dword [[VAL:v[0-9]+]] +; GCN: {{flat|global}}_load_dword [[VAL:v[0-9]+]] ; CI: v_cvt_f32_f16_e32 ; CI: v_cvt_f32_f16_e32 Index: test/CodeGen/AMDGPU/fmed3.ll =================================================================== --- test/CodeGen/AMDGPU/fmed3.ll +++ test/CodeGen/AMDGPU/fmed3.ll @@ -165,9 +165,9 @@ } ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod0: -; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]] +; GCN: {{buffer_|flat_|global_}}load_dword [[A:v[0-9]+]] +; GCN: {{buffer_|flat_|global_}}load_dword [[B:v[0-9]+]] +; GCN: {{buffer_|flat_|global_}}load_dword [[C:v[0-9]+]] ; GCN: v_med3_f32 v{{[0-9]+}}, -[[A]], [[B]], [[C]] define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -188,9 +188,9 @@ } ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod1: -; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]] ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], -[[B]], [[C]] define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -211,9 +211,9 @@ } ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod2: -; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]] ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], -[[C]] define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod2(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -234,9 +234,9 @@ } ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod012: -; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]] ; GCN: v_med3_f32 v{{[0-9]+}}, -[[A]], |[[B]]|, -|[[C]]| define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -263,9 +263,9 @@ } ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_negabs012: -; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]] ; GCN: v_med3_f32 v{{[0-9]+}}, -|[[A]]|, -|[[B]]|, -|[[C]]| define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -294,9 +294,9 @@ } ; GCN-LABEL: {{^}}v_nnan_inputs_med3_f32_pat0: -; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]] ; GCN-DAG: v_add_f32_e32 [[A_ADD:v[0-9]+]], 1.0, [[A]] ; GCN-DAG: v_add_f32_e32 [[B_ADD:v[0-9]+]], 2.0, [[B]] ; GCN-DAG: v_add_f32_e32 [[C_ADD:v[0-9]+]], 4.0, [[C]] @@ -337,9 +337,9 @@ ; + commute outermost max ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0: -; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]] ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]] define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -359,9 +359,9 @@ } ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat1: -; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]] ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]] define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -381,9 +381,9 @@ } ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat2: -; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]] ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]] define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat2(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -403,9 +403,9 @@ } ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat3: -; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]] ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]] define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat3(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -425,9 +425,9 @@ } ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat4: -; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]] ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]] define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat4(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -447,9 +447,9 @@ } ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat5: -; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]] ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]] define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat5(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -469,9 +469,9 @@ } ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat6: -; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]] ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]] define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat6(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -491,9 +491,9 @@ } ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat7: -; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]] ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]] define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat7(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -513,9 +513,9 @@ } ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat8: -; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]] ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]] define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat8(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -535,9 +535,9 @@ } ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat9: -; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]] ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]] define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat9(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -557,9 +557,9 @@ } ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat10: -; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]] ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]] define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat10(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -579,9 +579,9 @@ } ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat11: -; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]] ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]] define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat11(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -601,9 +601,9 @@ } ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat12: -; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]] ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]] define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat12(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -623,9 +623,9 @@ } ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat13: -; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]] ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]] define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat13(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -645,9 +645,9 @@ } ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat14: -; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]] ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[B]], [[C]] define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat14(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -667,9 +667,9 @@ } ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat15: -; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]] ; GCN: v_med3_f32 v{{[0-9]+}}, [[B]], [[A]], [[C]] define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat15(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -842,9 +842,9 @@ } ; GCN-LABEL: {{^}}v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch: -; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]] ; GCN: v_min_f32 ; GCN: v_max_f32 ; GCN: v_min_f32 @@ -869,9 +869,9 @@ ; A simple min and max is not sufficient ; GCN-LABEL: {{^}}v_test_global_nnans_min_max_f32: -; GCN: {{buffer_|flat_}}load_dword [[A:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[B:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_dword [[C:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[C:v[0-9]+]] ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], [[B]], [[A]] ; GCN: v_min_f32_e32 v{{[0-9]+}}, [[C]], [[MAX]] define amdgpu_kernel void @v_test_global_nnans_min_max_f32(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 { @@ -915,9 +915,9 @@ } ; GCN-LABEL: {{^}}v_nnan_inputs_med3_f16_pat0: -; GCN: {{buffer_|flat_}}load_ushort [[A:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_ushort [[B:v[0-9]+]] -; GCN: {{buffer_|flat_}}load_ushort [[C:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_ushort [[A:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_ushort [[B:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_ushort [[C:v[0-9]+]] ; SI: v_cvt_f32_f16 ; SI: v_cvt_f32_f16 Index: test/CodeGen/AMDGPU/fmuladd.v2f16.ll =================================================================== --- test/CodeGen/AMDGPU/fmuladd.v2f16.ll +++ test/CodeGen/AMDGPU/fmuladd.v2f16.ll @@ -28,15 +28,15 @@ } ; GCN-LABEL: {{^}}fmuladd_2.0_a_b_v2f16: -; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]], -; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]], +; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]], +; GCN: {{buffer|flat|global}}_load_dword [[R2:v[0-9]+]], ; GFX9-FLUSH: v_pk_add_f16 [[ADD0:v[0-9]+]], [[R1]], [[R1]] ; GFX9-FLUSH: v_pk_add_f16 [[RESULT:v[0-9]+]], [[ADD0]], [[R2]] -; GFX9-FLUSH: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] +; GFX9-FLUSH: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] ; GFX9-DENORM: v_pk_fma_f16 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]] -; GFX9-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] +; GFX9-DENORM: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] define amdgpu_kernel void @fmuladd_2.0_a_b_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep.0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid @@ -52,15 +52,15 @@ } ; GCN-LABEL: {{^}}fmuladd_a_2.0_b_v2f16: -; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]], -; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]], +; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]], +; GCN: {{buffer|flat|global}}_load_dword [[R2:v[0-9]+]], ; GFX9-FLUSH: v_pk_add_f16 [[ADD0:v[0-9]+]], [[R1]], [[R1]] ; GFX9-FLUSH: v_pk_add_f16 [[RESULT:v[0-9]+]], [[ADD0]], [[R2]] -; GFX9-FLUSH: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] +; GFX9-FLUSH: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] ; GFX9-DENORM: v_pk_fma_f16 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]] -; GFX9-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] +; GFX9-DENORM: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] define amdgpu_kernel void @fmuladd_a_2.0_b_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep.0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid @@ -76,8 +76,8 @@ } ; GCN-LABEL: {{^}}fadd_a_a_b_v2f16: -; GCN: {{buffer|flat}}_load_dword [[R1:v[0-9]+]], -; GCN: {{buffer|flat}}_load_dword [[R2:v[0-9]+]], +; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]], +; GCN: {{buffer|flat|global}}_load_dword [[R2:v[0-9]+]], ; GFX9-FLUSH: v_pk_add_f16 [[ADD0:v[0-9]+]], [[R1]], [[R1]] ; GFX9-FLUSH: v_pk_add_f16 [[RESULT:v[0-9]+]], [[ADD0]], [[R2]] @@ -85,7 +85,7 @@ ; GFX9-DENORM-STRICT: v_pk_add_f16 [[RESULT:v[0-9]+]], [[ADD0]], [[R2]] ; GFX9-DENORM-CONTRACT: v_pk_fma_f16 [[RESULT:v[0-9]+]], [[R1]], 2.0, [[R2]] -; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] +; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] define amdgpu_kernel void @fadd_a_a_b_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in1, <2 x half> addrspace(1)* %in2) #0 { Index: test/CodeGen/AMDGPU/fneg-fabs.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fneg-fabs.f16.ll +++ test/CodeGen/AMDGPU/fneg-fabs.f16.ll @@ -26,7 +26,7 @@ ; GFX89-NOT: _and ; GFX89: v_mul_f16_e64 [[MUL:v[0-9]+]], {{v[0-9]+}}, -|{{v[0-9]+}}| ; GFX89-NOT: [[MUL]] -; GFX89: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[MUL]] +; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[MUL]] define amdgpu_kernel void @fneg_fabs_fmul_f16(half addrspace(1)* %out, half %x, half %y) { %fabs = call half @llvm.fabs.f16(half %x) %fsub = fsub half -0.0, %fabs @@ -94,7 +94,7 @@ ; GFX9: s_or_b32 s{{[0-9]+}}, [[MASK]], s{{[0-9]+}} ; GFX9: s_or_b32 s{{[0-9]+}}, [[MASK]], s{{[0-9]+}} -; GCN: flat_store_dwordx2 +; GCN: {{flat|global}}_store_dwordx2 define amdgpu_kernel void @fneg_fabs_v4f16(<4 x half> addrspace(1)* %out, <4 x half> %in) { %fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %in) %fsub = fsub <4 x half> , %fabs Index: test/CodeGen/AMDGPU/fneg.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fneg.f16.ll +++ test/CodeGen/AMDGPU/fneg.f16.ll @@ -14,7 +14,7 @@ ; well. ; GCN-LABEL: {{^}}v_fneg_f16: -; GCN: flat_load_ushort [[VAL:v[0-9]+]], +; GCN: {{flat|global}}_load_ushort [[VAL:v[0-9]+]], ; GCN: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x8000, [[VAL]] ; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[XOR]] ; SI: buffer_store_short [[XOR]] @@ -29,7 +29,7 @@ } ; GCN-LABEL: {{^}}fneg_free_f16: -; GCN: flat_load_ushort [[NEG_VALUE:v[0-9]+]], +; GCN: {{flat|global}}_load_ushort [[NEG_VALUE:v[0-9]+]], ; XCI: s_xor_b32 [[XOR:s[0-9]+]], [[NEG_VALUE]], 0x8000{{$}} ; CI: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x8000, [[NEG_VALUE]] @@ -42,7 +42,7 @@ } ; GCN-LABEL: {{^}}v_fneg_fold_f16: -; GCN: flat_load_ushort [[NEG_VALUE:v[0-9]+]] +; GCN: {{flat|global}}_load_ushort [[NEG_VALUE:v[0-9]+]] ; CI-DAG: v_cvt_f32_f16_e32 [[CVT_VAL:v[0-9]+]], [[NEG_VALUE]] ; CI-DAG: v_cvt_f32_f16_e64 [[NEG_CVT0:v[0-9]+]], -[[NEG_VALUE]] @@ -81,7 +81,7 @@ } ; GCN-LABEL: {{^}}v_fneg_v2f16: -; GCN: flat_load_dword [[VAL:v[0-9]+]] +; GCN: {{flat|global}}_load_dword [[VAL:v[0-9]+]] ; GCN: v_xor_b32_e32 v{{[0-9]+}}, 0x80008000, [[VAL]] define amdgpu_kernel void @v_fneg_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -107,7 +107,7 @@ } ; GCN-LABEL: {{^}}v_fneg_fold_v2f16: -; GCN: flat_load_dword [[VAL:v[0-9]+]] +; GCN: {{flat|global}}_load_dword [[VAL:v[0-9]+]] ; CI: v_cvt_f32_f16_e64 v{{[0-9]+}}, -v{{[0-9]+}} ; CI: v_cvt_f32_f16_e64 v{{[0-9]+}}, -v{{[0-9]+}} Index: test/CodeGen/AMDGPU/global_atomics.ll =================================================================== --- test/CodeGen/AMDGPU/global_atomics.ll +++ test/CodeGen/AMDGPU/global_atomics.ll @@ -1,8 +1,10 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SIVI %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s -; FUNC-LABEL: {{^}}atomic_add_i32_offset: -; GCN: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN-LABEL: {{^}}atomic_add_i32_offset: +; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_add_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 @@ -10,9 +12,20 @@ ret void } -; FUNC-LABEL: {{^}}atomic_add_i32_soffset: -; GCN: s_mov_b32 [[SREG:s[0-9]+]], 0x8ca0 -; GCN: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], [[SREG]]{{$}} +; GCN-LABEL: {{^}}atomic_add_i32_max_neg_offset: +; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:-4096{{$}} +define amdgpu_kernel void @atomic_add_i32_max_neg_offset(i32 addrspace(1)* %out, i32 %in) { +entry: + %gep = getelementptr i32, i32 addrspace(1)* %out, i64 -1024 + %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst + ret void +} + +; GCN-LABEL: {{^}}atomic_add_i32_soffset: +; SIVI: s_mov_b32 [[SREG:s[0-9]+]], 0x8ca0 +; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], [[SREG]]{{$}} + +; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+$}} define amdgpu_kernel void @atomic_add_i32_soffset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 9000 @@ -20,11 +33,14 @@ ret void } -; FUNC-LABEL: {{^}}atomic_add_i32_huge_offset: +; GCN-LABEL: {{^}}atomic_add_i32_huge_offset: ; SI-DAG: v_mov_b32_e32 v[[PTRLO:[0-9]+]], 0xdeac ; SI-DAG: v_mov_b32_e32 v[[PTRHI:[0-9]+]], 0xabcd ; SI: buffer_atomic_add v{{[0-9]+}}, v{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} + ; VI: flat_atomic_add + +; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+$}} define amdgpu_kernel void @atomic_add_i32_huge_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 47224239175595 @@ -33,9 +49,11 @@ ret void } -; FUNC-LABEL: {{^}}atomic_add_i32_ret_offset: -; GCN: buffer_atomic_add [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} -; GCN: buffer_store_dword [[RET]] +; GCN-LABEL: {{^}}atomic_add_i32_ret_offset: +; SIVI: buffer_atomic_add [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} define amdgpu_kernel void @atomic_add_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 @@ -44,7 +62,7 @@ ret void } -; FUNC-LABEL: {{^}}atomic_add_i32_addr64_offset: +; GCN-LABEL: {{^}}atomic_add_i32_addr64_offset: ; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} ; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define amdgpu_kernel void @atomic_add_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { @@ -55,10 +73,13 @@ ret void } -; FUNC-LABEL: {{^}}atomic_add_i32_ret_addr64_offset: +; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64_offset: ; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} ; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} -; GCN: buffer_store_dword [[RET]] +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} +; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @atomic_add_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -68,17 +89,21 @@ ret void } -; FUNC-LABEL: {{^}}atomic_add_i32: -; GCN: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN-LABEL: {{^}}atomic_add_i32: +; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GFX9: global_atomic_add v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+$}} define amdgpu_kernel void @atomic_add_i32(i32 addrspace(1)* %out, i32 %in) { entry: %val = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst ret void } -; FUNC-LABEL: {{^}}atomic_add_i32_ret: -; GCN: buffer_atomic_add [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; GCN: buffer_store_dword [[RET]] +; GCN-LABEL: {{^}}atomic_add_i32_ret: +; SIVI: buffer_atomic_add [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}} +; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] define amdgpu_kernel void @atomic_add_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %val = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst @@ -86,7 +111,7 @@ ret void } -; FUNC-LABEL: {{^}}atomic_add_i32_addr64: +; GCN-LABEL: {{^}}atomic_add_i32_addr64: ; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} ; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define amdgpu_kernel void @atomic_add_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { @@ -96,10 +121,12 @@ ret void } -; FUNC-LABEL: {{^}}atomic_add_i32_ret_addr64: +; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64: ; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} ; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} -; GCN: buffer_store_dword [[RET]] +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} define amdgpu_kernel void @atomic_add_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -108,8 +135,10 @@ ret void } -; FUNC-LABEL: {{^}}atomic_and_i32_offset: -; GCN: buffer_atomic_and v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN-LABEL: {{^}}atomic_and_i32_offset: +; SIVI: buffer_atomic_and v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} + +; GFX9: global_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_and_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 @@ -117,9 +146,12 @@ ret void } -; FUNC-LABEL: {{^}}atomic_and_i32_ret_offset: -; GCN: buffer_atomic_and [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} -; GCN: buffer_store_dword [[RET]] +; GCN-LABEL: {{^}}atomic_and_i32_ret_offset: +; SIVI: buffer_atomic_and [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} + define amdgpu_kernel void @atomic_and_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 @@ -128,9 +160,11 @@ ret void } -; FUNC-LABEL: {{^}}atomic_and_i32_addr64_offset: +; GCN-LABEL: {{^}}atomic_and_i32_addr64_offset: ; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} ; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} + +; GFX9: global_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_and_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -139,10 +173,12 @@ ret void } -; FUNC-LABEL: {{^}}atomic_and_i32_ret_addr64_offset: +; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64_offset: ; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} ; VI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} -; GCN: buffer_store_dword [[RET]] +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} define amdgpu_kernel void @atomic_and_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -152,17 +188,21 @@ ret void } -; FUNC-LABEL: {{^}}atomic_and_i32: -; GCN: buffer_atomic_and v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN-LABEL: {{^}}atomic_and_i32: +; SIVI: buffer_atomic_and v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} + +; GFX9: global_atomic_and v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+$}} define amdgpu_kernel void @atomic_and_i32(i32 addrspace(1)* %out, i32 %in) { entry: %val = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst ret void } -; FUNC-LABEL: {{^}}atomic_and_i32_ret: -; GCN: buffer_atomic_and [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; GCN: buffer_store_dword [[RET]] +; GCN-LABEL: {{^}}atomic_and_i32_ret: +; SIVI: buffer_atomic_and [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}} define amdgpu_kernel void @atomic_and_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %val = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst @@ -170,9 +210,11 @@ ret void } -; FUNC-LABEL: {{^}}atomic_and_i32_addr64: +; GCN-LABEL: {{^}}atomic_and_i32_addr64: ; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} ; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} + +; GFX9: global_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define amdgpu_kernel void @atomic_and_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -180,10 +222,12 @@ ret void } -; FUNC-LABEL: {{^}}atomic_and_i32_ret_addr64: +; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64: ; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} ; VI: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} -; GCN: buffer_store_dword [[RET]] +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} define amdgpu_kernel void @atomic_and_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -192,8 +236,10 @@ ret void } -; FUNC-LABEL: {{^}}atomic_sub_i32_offset: -; GCN: buffer_atomic_sub v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN-LABEL: {{^}}atomic_sub_i32_offset: +; SIVI: buffer_atomic_sub v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} + +; GFX9: global_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_sub_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 @@ -201,9 +247,11 @@ ret void } -; FUNC-LABEL: {{^}}atomic_sub_i32_ret_offset: -; GCN: buffer_atomic_sub [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} -; GCN: buffer_store_dword [[RET]] +; GCN-LABEL: {{^}}atomic_sub_i32_ret_offset: +; SIVI: buffer_atomic_sub [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_sub v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} define amdgpu_kernel void @atomic_sub_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 @@ -212,9 +260,11 @@ ret void } -; FUNC-LABEL: {{^}}atomic_sub_i32_addr64_offset: +; GCN-LABEL: {{^}}atomic_sub_i32_addr64_offset: ; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} ; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} + +; GFX9: global_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_sub_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -223,10 +273,12 @@ ret void } -; FUNC-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset: +; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset: ; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} ; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} -; GCN: buffer_store_dword [[RET]] +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} define amdgpu_kernel void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -236,17 +288,21 @@ ret void } -; FUNC-LABEL: {{^}}atomic_sub_i32: -; GCN: buffer_atomic_sub v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN-LABEL: {{^}}atomic_sub_i32: +; SIVI: buffer_atomic_sub v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} + +; GFX9: global_atomic_sub v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+$}} define amdgpu_kernel void @atomic_sub_i32(i32 addrspace(1)* %out, i32 %in) { entry: %val = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst ret void } -; FUNC-LABEL: {{^}}atomic_sub_i32_ret: -; GCN: buffer_atomic_sub [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; GCN: buffer_store_dword [[RET]] +; GCN-LABEL: {{^}}atomic_sub_i32_ret: +; SIVI: buffer_atomic_sub [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}} define amdgpu_kernel void @atomic_sub_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %val = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst @@ -254,9 +310,11 @@ ret void } -; FUNC-LABEL: {{^}}atomic_sub_i32_addr64: +; GCN-LABEL: {{^}}atomic_sub_i32_addr64: ; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} ; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} + +; GFX9: global_atomic_sub v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+$}} define amdgpu_kernel void @atomic_sub_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -264,10 +322,12 @@ ret void } -; FUNC-LABEL: {{^}}atomic_sub_i32_ret_addr64: +; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64: ; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} ; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} -; GCN: buffer_store_dword [[RET]] +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} define amdgpu_kernel void @atomic_sub_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -276,8 +336,10 @@ ret void } -; FUNC-LABEL: {{^}}atomic_max_i32_offset: -; GCN: buffer_atomic_smax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN-LABEL: {{^}}atomic_max_i32_offset: +; SIVI: buffer_atomic_smax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} + +; GFX9: global_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_max_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 @@ -285,9 +347,11 @@ ret void } -; FUNC-LABEL: {{^}}atomic_max_i32_ret_offset: -; GCN: buffer_atomic_smax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} -; GCN: buffer_store_dword [[RET]] +; GCN-LABEL: {{^}}atomic_max_i32_ret_offset: +; SIVI: buffer_atomic_smax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} define amdgpu_kernel void @atomic_max_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 @@ -296,9 +360,11 @@ ret void } -; FUNC-LABEL: {{^}}atomic_max_i32_addr64_offset: +; GCN-LABEL: {{^}}atomic_max_i32_addr64_offset: ; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} ; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} + +; GFX9: global_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_max_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -307,10 +373,12 @@ ret void } -; FUNC-LABEL: {{^}}atomic_max_i32_ret_addr64_offset: +; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64_offset: ; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} ; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} -; GCN: buffer_store_dword [[RET]] +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} define amdgpu_kernel void @atomic_max_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -320,17 +388,21 @@ ret void } -; FUNC-LABEL: {{^}}atomic_max_i32: -; GCN: buffer_atomic_smax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN-LABEL: {{^}}atomic_max_i32: +; SIVI: buffer_atomic_smax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} + +; GFX9: global_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define amdgpu_kernel void @atomic_max_i32(i32 addrspace(1)* %out, i32 %in) { entry: %val = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst ret void } -; FUNC-LABEL: {{^}}atomic_max_i32_ret: -; GCN: buffer_atomic_smax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; GCN: buffer_store_dword [[RET]] +; GCN-LABEL: {{^}}atomic_max_i32_ret: +; SIVI: buffer_atomic_smax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} define amdgpu_kernel void @atomic_max_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %val = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst @@ -338,9 +410,11 @@ ret void } -; FUNC-LABEL: {{^}}atomic_max_i32_addr64: +; GCN-LABEL: {{^}}atomic_max_i32_addr64: ; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} ; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} + +; GFX9: global_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define amdgpu_kernel void @atomic_max_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -348,10 +422,12 @@ ret void } -; FUNC-LABEL: {{^}}atomic_max_i32_ret_addr64: +; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64: ; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} ; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} -; GCN: buffer_store_dword [[RET]] +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} define amdgpu_kernel void @atomic_max_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -360,8 +436,10 @@ ret void } -; FUNC-LABEL: {{^}}atomic_umax_i32_offset: -; GCN: buffer_atomic_umax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN-LABEL: {{^}}atomic_umax_i32_offset: +; SIVI: buffer_atomic_umax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} + +; GFX9: global_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_umax_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 @@ -369,9 +447,11 @@ ret void } -; FUNC-LABEL: {{^}}atomic_umax_i32_ret_offset: -; GCN: buffer_atomic_umax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} -; GCN: buffer_store_dword [[RET]] +; GCN-LABEL: {{^}}atomic_umax_i32_ret_offset: +; SIVI: buffer_atomic_umax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} define amdgpu_kernel void @atomic_umax_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 @@ -380,9 +460,10 @@ ret void } -; FUNC-LABEL: {{^}}atomic_umax_i32_addr64_offset: +; GCN-LABEL: {{^}}atomic_umax_i32_addr64_offset: ; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} ; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; GFX9: global_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_umax_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -391,10 +472,12 @@ ret void } -; FUNC-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset: +; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset: ; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} ; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} -; GCN: buffer_store_dword [[RET]] +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} define amdgpu_kernel void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -404,17 +487,22 @@ ret void } -; FUNC-LABEL: {{^}}atomic_umax_i32: -; GCN: buffer_atomic_umax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN-LABEL: {{^}}atomic_umax_i32: +; SIVI: buffer_atomic_umax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} + +; GFX9: global_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define amdgpu_kernel void @atomic_umax_i32(i32 addrspace(1)* %out, i32 %in) { entry: %val = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst ret void } -; FUNC-LABEL: {{^}}atomic_umax_i32_ret: -; GCN: buffer_atomic_umax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; GCN: buffer_store_dword [[RET]] +; GCN-LABEL: {{^}}atomic_umax_i32_ret: +; SIVI: buffer_atomic_umax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; SIVI: buffer_store_dword [[RET]] + + +; GFX9: global_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} define amdgpu_kernel void @atomic_umax_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %val = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst @@ -422,9 +510,10 @@ ret void } -; FUNC-LABEL: {{^}}atomic_umax_i32_addr64: +; GCN-LABEL: {{^}}atomic_umax_i32_addr64: ; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} ; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; GFX9: global_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define amdgpu_kernel void @atomic_umax_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -432,10 +521,12 @@ ret void } -; FUNC-LABEL: {{^}}atomic_umax_i32_ret_addr64: +; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64: ; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} ; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} -; GCN: buffer_store_dword [[RET]] +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} define amdgpu_kernel void @atomic_umax_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -444,8 +535,10 @@ ret void } -; FUNC-LABEL: {{^}}atomic_min_i32_offset: -; GCN: buffer_atomic_smin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN-LABEL: {{^}}atomic_min_i32_offset: +; SIVI: buffer_atomic_smin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} + +; GFX9: global_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_min_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 @@ -453,9 +546,11 @@ ret void } -; FUNC-LABEL: {{^}}atomic_min_i32_ret_offset: -; GCN: buffer_atomic_smin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} -; GCN: buffer_store_dword [[RET]] +; GCN-LABEL: {{^}}atomic_min_i32_ret_offset: +; SIVI: buffer_atomic_smin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} define amdgpu_kernel void @atomic_min_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 @@ -464,9 +559,10 @@ ret void } -; FUNC-LABEL: {{^}}atomic_min_i32_addr64_offset: +; GCN-LABEL: {{^}}atomic_min_i32_addr64_offset: ; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} ; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; GFX9: global_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 define amdgpu_kernel void @atomic_min_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -475,10 +571,12 @@ ret void } -; FUNC-LABEL: {{^}}atomic_min_i32_ret_addr64_offset: +; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64_offset: ; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} ; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} -; GCN: buffer_store_dword [[RET]] +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} define amdgpu_kernel void @atomic_min_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -488,17 +586,21 @@ ret void } -; FUNC-LABEL: {{^}}atomic_min_i32: -; GCN: buffer_atomic_smin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN-LABEL: {{^}}atomic_min_i32: +; SIVI: buffer_atomic_smin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} + +; GFX9: global_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define amdgpu_kernel void @atomic_min_i32(i32 addrspace(1)* %out, i32 %in) { entry: %val = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst ret void } -; FUNC-LABEL: {{^}}atomic_min_i32_ret: -; GCN: buffer_atomic_smin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; GCN: buffer_store_dword [[RET]] +; GCN-LABEL: {{^}}atomic_min_i32_ret: +; SIVI: buffer_atomic_smin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} define amdgpu_kernel void @atomic_min_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %val = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst @@ -506,9 +608,10 @@ ret void } -; FUNC-LABEL: {{^}}atomic_min_i32_addr64: +; GCN-LABEL: {{^}}atomic_min_i32_addr64: ; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} ; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; GFX9: global_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define amdgpu_kernel void @atomic_min_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -516,10 +619,12 @@ ret void } -; FUNC-LABEL: {{^}}atomic_min_i32_ret_addr64: +; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64: ; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} ; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} -; GCN: buffer_store_dword [[RET]] +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} define amdgpu_kernel void @atomic_min_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -528,8 +633,10 @@ ret void } -; FUNC-LABEL: {{^}}atomic_umin_i32_offset: -; GCN: buffer_atomic_umin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN-LABEL: {{^}}atomic_umin_i32_offset: +; SIVI: buffer_atomic_umin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} + +; GFX9: global_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_umin_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 @@ -537,9 +644,11 @@ ret void } -; FUNC-LABEL: {{^}}atomic_umin_i32_ret_offset: -; GCN: buffer_atomic_umin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} -; GCN: buffer_store_dword [[RET]] +; GCN-LABEL: {{^}}atomic_umin_i32_ret_offset: +; SIVI: buffer_atomic_umin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} define amdgpu_kernel void @atomic_umin_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 @@ -548,9 +657,10 @@ ret void } -; FUNC-LABEL: {{^}}atomic_umin_i32_addr64_offset: +; GCN-LABEL: {{^}}atomic_umin_i32_addr64_offset: ; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} ; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; GFX9: global_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_umin_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -559,10 +669,12 @@ ret void } -; FUNC-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset: +; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset: ; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} ; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} -; GCN: buffer_store_dword [[RET]] +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} define amdgpu_kernel void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -572,17 +684,20 @@ ret void } -; FUNC-LABEL: {{^}}atomic_umin_i32: -; GCN: buffer_atomic_umin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN-LABEL: {{^}}atomic_umin_i32: +; SIVI: buffer_atomic_umin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GFX9: global_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define amdgpu_kernel void @atomic_umin_i32(i32 addrspace(1)* %out, i32 %in) { entry: %val = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst ret void } -; FUNC-LABEL: {{^}}atomic_umin_i32_ret: -; SI: buffer_atomic_umin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; GCN: buffer_store_dword [[RET]] +; GCN-LABEL: {{^}}atomic_umin_i32_ret: +; SIVI: buffer_atomic_umin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc{{$}} define amdgpu_kernel void @atomic_umin_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %val = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst @@ -590,9 +705,10 @@ ret void } -; FUNC-LABEL: {{^}}atomic_umin_i32_addr64: +; GCN-LABEL: {{^}}atomic_umin_i32_addr64: ; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} ; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; GFX9: global_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define amdgpu_kernel void @atomic_umin_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -600,10 +716,12 @@ ret void } -; FUNC-LABEL: {{^}}atomic_umin_i32_ret_addr64: +; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64: ; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} ; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} -; GCN: buffer_store_dword [[RET]] +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} define amdgpu_kernel void @atomic_umin_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -612,8 +730,10 @@ ret void } -; FUNC-LABEL: {{^}}atomic_or_i32_offset: -; GCN: buffer_atomic_or v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN-LABEL: {{^}}atomic_or_i32_offset: +; SIVI: buffer_atomic_or v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} + +; GFX9: global_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_or_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 @@ -621,9 +741,11 @@ ret void } -; FUNC-LABEL: {{^}}atomic_or_i32_ret_offset: -; GCN: buffer_atomic_or [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} -; GCN: buffer_store_dword [[RET]] +; GCN-LABEL: {{^}}atomic_or_i32_ret_offset: +; SIVI: buffer_atomic_or [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} define amdgpu_kernel void @atomic_or_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 @@ -632,9 +754,10 @@ ret void } -; FUNC-LABEL: {{^}}atomic_or_i32_addr64_offset: +; GCN-LABEL: {{^}}atomic_or_i32_addr64_offset: ; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} ; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; GFX9: global_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 define amdgpu_kernel void @atomic_or_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -643,10 +766,12 @@ ret void } -; FUNC-LABEL: {{^}}atomic_or_i32_ret_addr64_offset: +; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64_offset: ; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} ; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} -; GCN: buffer_store_dword [[RET]] +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} define amdgpu_kernel void @atomic_or_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -656,17 +781,21 @@ ret void } -; FUNC-LABEL: {{^}}atomic_or_i32: -; GCN: buffer_atomic_or v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN-LABEL: {{^}}atomic_or_i32: +; SIVI: buffer_atomic_or v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} + +; GFX9: global_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define amdgpu_kernel void @atomic_or_i32(i32 addrspace(1)* %out, i32 %in) { entry: %val = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst ret void } -; FUNC-LABEL: {{^}}atomic_or_i32_ret: -; GCN: buffer_atomic_or [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; GCN: buffer_store_dword [[RET]] +; GCN-LABEL: {{^}}atomic_or_i32_ret: +; SIVI: buffer_atomic_or [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} define amdgpu_kernel void @atomic_or_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %val = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst @@ -674,9 +803,10 @@ ret void } -; FUNC-LABEL: {{^}}atomic_or_i32_addr64: +; GCN-LABEL: {{^}}atomic_or_i32_addr64: ; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} ; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; GFX9: global_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define amdgpu_kernel void @atomic_or_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -684,10 +814,12 @@ ret void } -; FUNC-LABEL: {{^}}atomic_or_i32_ret_addr64: +; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64: ; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} ; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} -; GCN: buffer_store_dword [[RET]] +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} define amdgpu_kernel void @atomic_or_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -696,8 +828,10 @@ ret void } -; FUNC-LABEL: {{^}}atomic_xchg_i32_offset: -; GCN: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN-LABEL: {{^}}atomic_xchg_i32_offset: +; SIVI: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} + +; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 @@ -705,9 +839,11 @@ ret void } -; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_offset: -; GCN: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} -; GCN: buffer_store_dword [[RET]] +; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset: +; SIVI: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} define amdgpu_kernel void @atomic_xchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 @@ -716,10 +852,10 @@ ret void } -; FUNC-LABEL: {{^}}atomic_xchg_i32_addr64_offset: +; GCN-LABEL: {{^}}atomic_xchg_i32_addr64_offset: ; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} - -; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}{{$}} +; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -728,11 +864,12 @@ ret void } -; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset: +; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset: ; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} - ; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} -; GCN: buffer_store_dword [[RET]] +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} define amdgpu_kernel void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -742,17 +879,20 @@ ret void } -; FUNC-LABEL: {{^}}atomic_xchg_i32: -; GCN: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN-LABEL: {{^}}atomic_xchg_i32: +; SIVI: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GFX9: global_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define amdgpu_kernel void @atomic_xchg_i32(i32 addrspace(1)* %out, i32 %in) { entry: %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst ret void } -; FUNC-LABEL: {{^}}atomic_xchg_i32_ret: -; GCN: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; GCN: buffer_store_dword [[RET]] +; GCN-LABEL: {{^}}atomic_xchg_i32_ret: +; SIVI: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} define amdgpu_kernel void @atomic_xchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst @@ -760,7 +900,7 @@ ret void } -; FUNC-LABEL: {{^}}atomic_xchg_i32_addr64: +; GCN-LABEL: {{^}}atomic_xchg_i32_addr64: ; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} ; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define amdgpu_kernel void @atomic_xchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { @@ -770,10 +910,12 @@ ret void } -; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_addr64: +; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64: ; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} ; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} -; GCN: buffer_store_dword [[RET]] +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} define amdgpu_kernel void @atomic_xchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -782,8 +924,10 @@ ret void } -; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_offset: -; GCN: buffer_atomic_cmpswap v[{{[0-9]+}}:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN-LABEL: {{^}}atomic_cmpxchg_i32_offset: +; SIVI: buffer_atomic_cmpswap v[{{[0-9]+}}:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} + +; GFX9: global_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset:16{{$}} define amdgpu_kernel void @atomic_cmpxchg_i32_offset(i32 addrspace(1)* %out, i32 %in, i32 %old) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 @@ -791,9 +935,12 @@ ret void } -; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_ret_offset: -; GCN: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} -; GCN: buffer_store_dword v[[RET]] +; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_offset: +; SIVI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} +; SIVI: buffer_store_dword v[[RET]] + +; GFX9: global_atomic_cmpswap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:16 glc{{$}} + define amdgpu_kernel void @atomic_cmpxchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i32 %old) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 @@ -803,7 +950,7 @@ ret void } -; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset: +; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset: ; SI: buffer_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} ; VI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} @@ -815,10 +962,12 @@ ret void } -; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64_offset: +; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64_offset: ; SI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} ; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} -; GCN: buffer_store_dword v[[RET]] +; SIVI: buffer_store_dword v[[RET]] + +; GFX9: global_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index, i32 %old) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -829,17 +978,21 @@ ret void } -; FUNC-LABEL: {{^}}atomic_cmpxchg_i32: -; GCN: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN-LABEL: {{^}}atomic_cmpxchg_i32: +; SIVI: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} + +; GFX9: global_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}} define amdgpu_kernel void @atomic_cmpxchg_i32(i32 addrspace(1)* %out, i32 %in, i32 %old) { entry: %val = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst ret void } -; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_ret: -; GCN: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; GCN: buffer_store_dword v[[RET]] +; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret: +; SIVI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; SIVI: buffer_store_dword v[[RET]] + +; GFX9: global_atomic_cmpswap [[RET:v[0-9]+]], v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} define amdgpu_kernel void @atomic_cmpxchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i32 %old) { entry: %val = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst @@ -848,9 +1001,10 @@ ret void } -; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_addr64: +; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64: ; SI: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} ; VI: flat_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}} +; GFX9: global_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}} define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -858,10 +1012,12 @@ ret void } -; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64: +; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64: ; SI: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} ; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} -; GCN: buffer_store_dword v[[RET]] +; SIVI: buffer_store_dword v[[RET]] + +; GFX9: global_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index, i32 %old) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -871,8 +1027,10 @@ ret void } -; FUNC-LABEL: {{^}}atomic_xor_i32_offset: -; GCN: buffer_atomic_xor v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN-LABEL: {{^}}atomic_xor_i32_offset: +; SIVI: buffer_atomic_xor v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} + +; GFX9: global_atomic_xor v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 @@ -880,9 +1038,11 @@ ret void } -; FUNC-LABEL: {{^}}atomic_xor_i32_ret_offset: -; GCN: buffer_atomic_xor [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} -; GCN: buffer_store_dword [[RET]] +; GCN-LABEL: {{^}}atomic_xor_i32_ret_offset: +; SIVI: buffer_atomic_xor [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_xor v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} define amdgpu_kernel void @atomic_xor_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 @@ -891,9 +1051,10 @@ ret void } -; FUNC-LABEL: {{^}}atomic_xor_i32_addr64_offset: +; GCN-LABEL: {{^}}atomic_xor_i32_addr64_offset: ; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} ; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; GFX9: global_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16{{$}} define amdgpu_kernel void @atomic_xor_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -902,10 +1063,12 @@ ret void } -; FUNC-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset: +; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset: ; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} ; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} -; GCN: buffer_store_dword [[RET]] +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} offset:16 glc{{$}} define amdgpu_kernel void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -915,17 +1078,20 @@ ret void } -; FUNC-LABEL: {{^}}atomic_xor_i32: -; GCN: buffer_atomic_xor v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN-LABEL: {{^}}atomic_xor_i32: +; SIVI: buffer_atomic_xor v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GFX9: global_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define amdgpu_kernel void @atomic_xor_i32(i32 addrspace(1)* %out, i32 %in) { entry: %val = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst ret void } -; FUNC-LABEL: {{^}}atomic_xor_i32_ret: -; GCN: buffer_atomic_xor [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; GCN: buffer_store_dword [[RET]] +; GCN-LABEL: {{^}}atomic_xor_i32_ret: +; SIVI: buffer_atomic_xor [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} define amdgpu_kernel void @atomic_xor_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: %val = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst @@ -933,9 +1099,10 @@ ret void } -; FUNC-LABEL: {{^}}atomic_xor_i32_addr64: +; GCN-LABEL: {{^}}atomic_xor_i32_addr64: ; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} ; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} +; GFX9: global_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define amdgpu_kernel void @atomic_xor_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -943,10 +1110,12 @@ ret void } -; FUNC-LABEL: {{^}}atomic_xor_i32_ret_addr64: +; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64: ; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} ; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} -; GCN: buffer_store_dword [[RET]] +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} define amdgpu_kernel void @atomic_xor_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -955,10 +1124,12 @@ ret void } -; FUNC-LABEL: {{^}}atomic_load_i32_offset: +; GCN-LABEL: {{^}}atomic_load_i32_offset: ; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} ; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} -; GCN: buffer_store_dword [[RET]] +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}} define amdgpu_kernel void @atomic_load_i32_offset(i32 addrspace(1)* %in, i32 addrspace(1)* %out) { entry: %gep = getelementptr i32, i32 addrspace(1)* %in, i64 4 @@ -967,10 +1138,12 @@ ret void } -; FUNC-LABEL: {{^}}atomic_load_i32: +; GCN-LABEL: {{^}}atomic_load_i32: ; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc ; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc -; GCN: buffer_store_dword [[RET]] +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc define amdgpu_kernel void @atomic_load_i32(i32 addrspace(1)* %in, i32 addrspace(1)* %out) { entry: %val = load atomic i32, i32 addrspace(1)* %in seq_cst, align 4 @@ -978,10 +1151,12 @@ ret void } -; FUNC-LABEL: {{^}}atomic_load_i32_addr64_offset: +; GCN-LABEL: {{^}}atomic_load_i32_addr64_offset: ; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} ; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}} -; GCN: buffer_store_dword [[RET]] +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} define amdgpu_kernel void @atomic_load_i32_addr64_offset(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %in, i64 %index @@ -991,10 +1166,12 @@ ret void } -; FUNC-LABEL: {{^}}atomic_load_i32_addr64: +; GCN-LABEL: {{^}}atomic_load_i32_addr64: ; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} ; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}} -; GCN: buffer_store_dword [[RET]] +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}} define amdgpu_kernel void @atomic_load_i32_addr64(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %in, i64 %index @@ -1003,9 +1180,10 @@ ret void } -; FUNC-LABEL: {{^}}atomic_store_i32_offset: +; GCN-LABEL: {{^}}atomic_store_i32_offset: ; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}} +; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16 glc{{$}} define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, i32 addrspace(1)* %out) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 @@ -1013,18 +1191,20 @@ ret void } -; FUNC-LABEL: {{^}}atomic_store_i32: +; GCN-LABEL: {{^}}atomic_store_i32: ; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc{{$}} ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}} +; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}} define amdgpu_kernel void @atomic_store_i32(i32 %in, i32 addrspace(1)* %out) { entry: store atomic i32 %in, i32 addrspace(1)* %out seq_cst, align 4 ret void } -; FUNC-LABEL: {{^}}atomic_store_i32_addr64_offset: +; GCN-LABEL: {{^}}atomic_store_i32_addr64_offset: ; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}} +; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16 glc{{$}} define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, i32 addrspace(1)* %out, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index @@ -1033,9 +1213,10 @@ ret void } -; FUNC-LABEL: {{^}}atomic_store_i32_addr64: +; GCN-LABEL: {{^}}atomic_store_i32_addr64: ; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}} +; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}} define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, i32 addrspace(1)* %out, i64 %index) { entry: %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index Index: test/CodeGen/AMDGPU/global_atomics_i64.ll =================================================================== --- test/CodeGen/AMDGPU/global_atomics_i64.ll +++ test/CodeGen/AMDGPU/global_atomics_i64.ll @@ -1,8 +1,11 @@ -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s ; GCN-LABEL: {{^}}atomic_add_i64_offset: -; GCN: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} +; CIVI: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} + +; GFX9: global_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}] offset:32{{$}} define amdgpu_kernel void @atomic_add_i64_offset(i64 addrspace(1)* %out, i64 %in) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 @@ -11,8 +14,10 @@ } ; GCN-LABEL: {{^}}atomic_add_i64_ret_offset: -; GCN: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}] offset:32 glc{{$}} define amdgpu_kernel void @atomic_add_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 @@ -24,6 +29,7 @@ ; GCN-LABEL: {{^}}atomic_add_i64_addr64_offset: ; CI: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}} ; VI: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}} +; GFX9: global_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} define amdgpu_kernel void @atomic_add_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -35,7 +41,9 @@ ; GCN-LABEL: {{^}}atomic_add_i64_ret_addr64_offset: ; CI: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}} ; VI: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}} define amdgpu_kernel void @atomic_add_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -46,7 +54,8 @@ } ; GCN-LABEL: {{^}}atomic_add_i64: -; GCN: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; SIVI: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GFX9: global_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} define amdgpu_kernel void @atomic_add_i64(i64 addrspace(1)* %out, i64 %in) { entry: %tmp0 = atomicrmw volatile add i64 addrspace(1)* %out, i64 %in seq_cst @@ -54,8 +63,10 @@ } ; GCN-LABEL: {{^}}atomic_add_i64_ret: -; GCN: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} define amdgpu_kernel void @atomic_add_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: %tmp0 = atomicrmw volatile add i64 addrspace(1)* %out, i64 %in seq_cst @@ -76,7 +87,9 @@ ; GCN-LABEL: {{^}}atomic_add_i64_ret_addr64: ; CI: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} ; VI: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} define amdgpu_kernel void @atomic_add_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -86,7 +99,8 @@ } ; GCN-LABEL: {{^}}atomic_and_i64_offset: -; GCN: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} +; CIVI: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} +; GFX9: global_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} define amdgpu_kernel void @atomic_and_i64_offset(i64 addrspace(1)* %out, i64 %in) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 @@ -95,8 +109,10 @@ } ; GCN-LABEL: {{^}}atomic_and_i64_ret_offset: -; GCN: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}} define amdgpu_kernel void @atomic_and_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 @@ -108,6 +124,7 @@ ; GCN-LABEL: {{^}}atomic_and_i64_addr64_offset: ; CI: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}} ; VI: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} +; GFX9: global_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} define amdgpu_kernel void @atomic_and_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -119,7 +136,9 @@ ; GCN-LABEL: {{^}}atomic_and_i64_ret_addr64_offset: ; CI: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}} ; VI: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}} define amdgpu_kernel void @atomic_and_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -130,7 +149,8 @@ } ; GCN-LABEL: {{^}}atomic_and_i64: -; GCN: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; CIVI: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GFX9: global_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} define amdgpu_kernel void @atomic_and_i64(i64 addrspace(1)* %out, i64 %in) { entry: %tmp0 = atomicrmw volatile and i64 addrspace(1)* %out, i64 %in seq_cst @@ -138,8 +158,10 @@ } ; GCN-LABEL: {{^}}atomic_and_i64_ret: -; GCN: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} define amdgpu_kernel void @atomic_and_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: %tmp0 = atomicrmw volatile and i64 addrspace(1)* %out, i64 %in seq_cst @@ -150,6 +172,7 @@ ; GCN-LABEL: {{^}}atomic_and_i64_addr64: ; CI: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} ; VI: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} +; GFX9: global_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} define amdgpu_kernel void @atomic_and_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -160,7 +183,9 @@ ; GCN-LABEL: {{^}}atomic_and_i64_ret_addr64: ; CI: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} ; VI: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} define amdgpu_kernel void @atomic_and_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -170,7 +195,8 @@ } ; GCN-LABEL: {{^}}atomic_sub_i64_offset: -; GCN: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} +; CIVI: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} +; GFX9: global_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} define amdgpu_kernel void @atomic_sub_i64_offset(i64 addrspace(1)* %out, i64 %in) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 @@ -179,8 +205,10 @@ } ; GCN-LABEL: {{^}}atomic_sub_i64_ret_offset: -; GCN: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}} define amdgpu_kernel void @atomic_sub_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 @@ -192,6 +220,7 @@ ; GCN-LABEL: {{^}}atomic_sub_i64_addr64_offset: ; CI: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}} ; VI: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} +; GFX9: global_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} define amdgpu_kernel void @atomic_sub_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -203,7 +232,9 @@ ; GCN-LABEL: {{^}}atomic_sub_i64_ret_addr64_offset: ; CI: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}} ; VI: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}} define amdgpu_kernel void @atomic_sub_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -214,7 +245,8 @@ } ; GCN-LABEL: {{^}}atomic_sub_i64: -; GCN: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; CIVI: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GFX9: global_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} define amdgpu_kernel void @atomic_sub_i64(i64 addrspace(1)* %out, i64 %in) { entry: %tmp0 = atomicrmw volatile sub i64 addrspace(1)* %out, i64 %in seq_cst @@ -222,8 +254,10 @@ } ; GCN-LABEL: {{^}}atomic_sub_i64_ret: -; GCN: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} define amdgpu_kernel void @atomic_sub_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: %tmp0 = atomicrmw volatile sub i64 addrspace(1)* %out, i64 %in seq_cst @@ -234,6 +268,7 @@ ; GCN-LABEL: {{^}}atomic_sub_i64_addr64: ; CI: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} ; VI: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} +; GFX9: global_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} define amdgpu_kernel void @atomic_sub_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -244,7 +279,9 @@ ; GCN-LABEL: {{^}}atomic_sub_i64_ret_addr64: ; CI: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} ; VI: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} define amdgpu_kernel void @atomic_sub_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -254,7 +291,8 @@ } ; GCN-LABEL: {{^}}atomic_max_i64_offset: -; GCN: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} +; CIVI: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} +; GFX9: global_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} define amdgpu_kernel void @atomic_max_i64_offset(i64 addrspace(1)* %out, i64 %in) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 @@ -263,8 +301,10 @@ } ; GCN-LABEL: {{^}}atomic_max_i64_ret_offset: -; GCN: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}} define amdgpu_kernel void @atomic_max_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 @@ -276,6 +316,7 @@ ; GCN-LABEL: {{^}}atomic_max_i64_addr64_offset: ; CI: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}} ; VI: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} +; GFX9: global_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} define amdgpu_kernel void @atomic_max_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -287,7 +328,9 @@ ; GCN-LABEL: {{^}}atomic_max_i64_ret_addr64_offset: ; CI: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}} ; VI: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}} define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -298,7 +341,8 @@ } ; GCN-LABEL: {{^}}atomic_max_i64: -; GCN: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; CIVI: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GFX9: global_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} define amdgpu_kernel void @atomic_max_i64(i64 addrspace(1)* %out, i64 %in) { entry: %tmp0 = atomicrmw volatile max i64 addrspace(1)* %out, i64 %in seq_cst @@ -306,8 +350,10 @@ } ; GCN-LABEL: {{^}}atomic_max_i64_ret: -; GCN: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} define amdgpu_kernel void @atomic_max_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: %tmp0 = atomicrmw volatile max i64 addrspace(1)* %out, i64 %in seq_cst @@ -318,6 +364,7 @@ ; GCN-LABEL: {{^}}atomic_max_i64_addr64: ; CI: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} ; VI: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} +; GFX9: global_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} define amdgpu_kernel void @atomic_max_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -328,7 +375,9 @@ ; GCN-LABEL: {{^}}atomic_max_i64_ret_addr64: ; CI: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} ; VI: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} define amdgpu_kernel void @atomic_max_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -338,7 +387,8 @@ } ; GCN-LABEL: {{^}}atomic_umax_i64_offset: -; GCN: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} +; CIVI: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} +; GFX9: global_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} define amdgpu_kernel void @atomic_umax_i64_offset(i64 addrspace(1)* %out, i64 %in) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 @@ -347,8 +397,10 @@ } ; GCN-LABEL: {{^}}atomic_umax_i64_ret_offset: -; GCN: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}} define amdgpu_kernel void @atomic_umax_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 @@ -360,6 +412,7 @@ ; GCN-LABEL: {{^}}atomic_umax_i64_addr64_offset: ; CI: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}} ; VI: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} +; FX9: global_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} define amdgpu_kernel void @atomic_umax_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -371,7 +424,9 @@ ; GCN-LABEL: {{^}}atomic_umax_i64_ret_addr64_offset: ; CI: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}} ; VI: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}} define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -382,7 +437,8 @@ } ; GCN-LABEL: {{^}}atomic_umax_i64: -; GCN: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; CIVI: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GFX9: global_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} define amdgpu_kernel void @atomic_umax_i64(i64 addrspace(1)* %out, i64 %in) { entry: %tmp0 = atomicrmw volatile umax i64 addrspace(1)* %out, i64 %in seq_cst @@ -390,8 +446,10 @@ } ; GCN-LABEL: {{^}}atomic_umax_i64_ret: -; GCN: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} define amdgpu_kernel void @atomic_umax_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: %tmp0 = atomicrmw volatile umax i64 addrspace(1)* %out, i64 %in seq_cst @@ -402,6 +460,7 @@ ; GCN-LABEL: {{^}}atomic_umax_i64_addr64: ; CI: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} ; VI: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} +; GFX9: global_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} define amdgpu_kernel void @atomic_umax_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -412,7 +471,9 @@ ; GCN-LABEL: {{^}}atomic_umax_i64_ret_addr64: ; CI: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} ; VI: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} define amdgpu_kernel void @atomic_umax_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -422,7 +483,8 @@ } ; GCN-LABEL: {{^}}atomic_min_i64_offset: -; GCN: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} +; CIVI: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} +; GFX9: global_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} define amdgpu_kernel void @atomic_min_i64_offset(i64 addrspace(1)* %out, i64 %in) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 @@ -431,8 +493,10 @@ } ; GCN-LABEL: {{^}}atomic_min_i64_ret_offset: -; GCN: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}} define amdgpu_kernel void @atomic_min_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 @@ -444,6 +508,7 @@ ; GCN-LABEL: {{^}}atomic_min_i64_addr64_offset: ; CI: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}} ; VI: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} +; GFX9: global_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} define amdgpu_kernel void @atomic_min_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -455,7 +520,9 @@ ; GCN-LABEL: {{^}}atomic_min_i64_ret_addr64_offset: ; CI: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}} ; VI: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}} define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -466,7 +533,8 @@ } ; GCN-LABEL: {{^}}atomic_min_i64: -; GCN: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; CIVI: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GFX9: global_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} define amdgpu_kernel void @atomic_min_i64(i64 addrspace(1)* %out, i64 %in) { entry: %tmp0 = atomicrmw volatile min i64 addrspace(1)* %out, i64 %in seq_cst @@ -474,8 +542,10 @@ } ; GCN-LABEL: {{^}}atomic_min_i64_ret: -; GCN: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} define amdgpu_kernel void @atomic_min_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: %tmp0 = atomicrmw volatile min i64 addrspace(1)* %out, i64 %in seq_cst @@ -486,6 +556,7 @@ ; GCN-LABEL: {{^}}atomic_min_i64_addr64: ; CI: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} ; VI: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} +; GFX9: global_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} define amdgpu_kernel void @atomic_min_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -496,7 +567,9 @@ ; GCN-LABEL: {{^}}atomic_min_i64_ret_addr64: ; CI: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} ; VI: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} define amdgpu_kernel void @atomic_min_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -506,7 +579,9 @@ } ; GCN-LABEL: {{^}}atomic_umin_i64_offset: -; GCN: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} +; CIVI: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} + +; GFX9: global_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} define amdgpu_kernel void @atomic_umin_i64_offset(i64 addrspace(1)* %out, i64 %in) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 @@ -515,8 +590,10 @@ } ; GCN-LABEL: {{^}}atomic_umin_i64_ret_offset: -; GCN: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}} define amdgpu_kernel void @atomic_umin_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 @@ -528,6 +605,7 @@ ; GCN-LABEL: {{^}}atomic_umin_i64_addr64_offset: ; CI: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}} ; VI: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} +; GFX9: global_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} define amdgpu_kernel void @atomic_umin_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -539,7 +617,9 @@ ; GCN-LABEL: {{^}}atomic_umin_i64_ret_addr64_offset: ; CI: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}} ; VI: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}} define amdgpu_kernel void @atomic_umin_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -550,7 +630,8 @@ } ; GCN-LABEL: {{^}}atomic_umin_i64: -; GCN: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; CIVI: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GFX9: global_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} define amdgpu_kernel void @atomic_umin_i64(i64 addrspace(1)* %out, i64 %in) { entry: %tmp0 = atomicrmw volatile umin i64 addrspace(1)* %out, i64 %in seq_cst @@ -558,8 +639,10 @@ } ; GCN-LABEL: {{^}}atomic_umin_i64_ret: -; CI: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} define amdgpu_kernel void @atomic_umin_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: %tmp0 = atomicrmw volatile umin i64 addrspace(1)* %out, i64 %in seq_cst @@ -570,6 +653,7 @@ ; GCN-LABEL: {{^}}atomic_umin_i64_addr64: ; CI: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} ; VI: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} +; GFX9: global_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} define amdgpu_kernel void @atomic_umin_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -580,7 +664,9 @@ ; GCN-LABEL: {{^}}atomic_umin_i64_ret_addr64: ; CI: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} ; VI: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} define amdgpu_kernel void @atomic_umin_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -590,7 +676,8 @@ } ; GCN-LABEL: {{^}}atomic_or_i64_offset: -; GCN: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} +; CIVI: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} +; GFX9: global_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} define amdgpu_kernel void @atomic_or_i64_offset(i64 addrspace(1)* %out, i64 %in) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 @@ -599,8 +686,10 @@ } ; GCN-LABEL: {{^}}atomic_or_i64_ret_offset: -; GCN: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}} define amdgpu_kernel void @atomic_or_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 @@ -612,6 +701,7 @@ ; GCN-LABEL: {{^}}atomic_or_i64_addr64_offset: ; CI: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}} ; VI: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} +; GFX9: global_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} define amdgpu_kernel void @atomic_or_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -623,7 +713,9 @@ ; GCN-LABEL: {{^}}atomic_or_i64_ret_addr64_offset: ; CI: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}} ; VI: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}} define amdgpu_kernel void @atomic_or_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -634,7 +726,8 @@ } ; GCN-LABEL: {{^}}atomic_or_i64: -; GCN: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; CIVI: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GFX9: global_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} define amdgpu_kernel void @atomic_or_i64(i64 addrspace(1)* %out, i64 %in) { entry: %tmp0 = atomicrmw volatile or i64 addrspace(1)* %out, i64 %in seq_cst @@ -642,8 +735,10 @@ } ; GCN-LABEL: {{^}}atomic_or_i64_ret: -; GCN: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} define amdgpu_kernel void @atomic_or_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: %tmp0 = atomicrmw volatile or i64 addrspace(1)* %out, i64 %in seq_cst @@ -654,6 +749,7 @@ ; GCN-LABEL: {{^}}atomic_or_i64_addr64: ; CI: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} ; VI: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} +; GFX9: global_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} define amdgpu_kernel void @atomic_or_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -664,7 +760,9 @@ ; GCN-LABEL: {{^}}atomic_or_i64_ret_addr64: ; CI: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} ; VI: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} define amdgpu_kernel void @atomic_or_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -674,7 +772,9 @@ } ; GCN-LABEL: {{^}}atomic_xchg_i64_offset: -; GCN: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} +; CIVI: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} + +; GFX9: global_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} define amdgpu_kernel void @atomic_xchg_i64_offset(i64 addrspace(1)* %out, i64 %in) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 @@ -683,8 +783,10 @@ } ; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset: -; GCN: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}} define amdgpu_kernel void @atomic_xchg_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 @@ -696,6 +798,7 @@ ; GCN-LABEL: {{^}}atomic_xchg_i64_addr64_offset: ; CI: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}} ; VI: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}} +; GFX9: global_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} define amdgpu_kernel void @atomic_xchg_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -707,7 +810,9 @@ ; GCN-LABEL: {{^}}atomic_xchg_i64_ret_addr64_offset: ; CI: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}} ; VI: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}} define amdgpu_kernel void @atomic_xchg_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -718,7 +823,8 @@ } ; GCN-LABEL: {{^}}atomic_xchg_i64: -; GCN: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; CIVI: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GFX9: global_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} define amdgpu_kernel void @atomic_xchg_i64(i64 addrspace(1)* %out, i64 %in) { entry: %tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %out, i64 %in seq_cst @@ -726,8 +832,10 @@ } ; GCN-LABEL: {{^}}atomic_xchg_i64_ret: -; GCN: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} define amdgpu_kernel void @atomic_xchg_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: %tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %out, i64 %in seq_cst @@ -738,6 +846,7 @@ ; GCN-LABEL: {{^}}atomic_xchg_i64_addr64: ; CI: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} ; VI: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} +; GFX9: global_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} define amdgpu_kernel void @atomic_xchg_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -748,7 +857,9 @@ ; GCN-LABEL: {{^}}atomic_xchg_i64_ret_addr64: ; CI: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} ; VI: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} define amdgpu_kernel void @atomic_xchg_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -758,7 +869,8 @@ } ; GCN-LABEL: {{^}}atomic_xor_i64_offset: -; GCN: buffer_atomic_xor_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} +; CIVI: buffer_atomic_xor_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} +; GFX9: global_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} define amdgpu_kernel void @atomic_xor_i64_offset(i64 addrspace(1)* %out, i64 %in) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 @@ -767,8 +879,10 @@ } ; GCN-LABEL: {{^}}atomic_xor_i64_ret_offset: -; GCN: buffer_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}} define amdgpu_kernel void @atomic_xor_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 @@ -780,6 +894,7 @@ ; GCN-LABEL: {{^}}atomic_xor_i64_addr64_offset: ; CI: buffer_atomic_xor_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}} ; VI: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} +; GFX9: global_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} define amdgpu_kernel void @atomic_xor_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -791,7 +906,9 @@ ; GCN-LABEL: {{^}}atomic_xor_i64_ret_addr64_offset: ; CI: buffer_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}} ; VI: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}} define amdgpu_kernel void @atomic_xor_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -802,7 +919,8 @@ } ; GCN-LABEL: {{^}}atomic_xor_i64: -; GCN: buffer_atomic_xor_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; CIVI: buffer_atomic_xor_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GFX9: global_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} define amdgpu_kernel void @atomic_xor_i64(i64 addrspace(1)* %out, i64 %in) { entry: %tmp0 = atomicrmw volatile xor i64 addrspace(1)* %out, i64 %in seq_cst @@ -810,8 +928,10 @@ } ; GCN-LABEL: {{^}}atomic_xor_i64_ret: -; GCN: buffer_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} define amdgpu_kernel void @atomic_xor_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: %tmp0 = atomicrmw volatile xor i64 addrspace(1)* %out, i64 %in seq_cst @@ -822,6 +942,7 @@ ; GCN-LABEL: {{^}}atomic_xor_i64_addr64: ; CI: buffer_atomic_xor_x2 v{{\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} ; VI: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} +; GFX9: global_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} define amdgpu_kernel void @atomic_xor_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -832,7 +953,9 @@ ; GCN-LABEL: {{^}}atomic_xor_i64_ret_addr64: ; CI: buffer_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} ; VI: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}} define amdgpu_kernel void @atomic_xor_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -842,15 +965,9 @@ } - - - - - - - -; FUNC-LABEL: {{^}}atomic_cmpxchg_i64_offset: -; GCN: buffer_atomic_cmpswap_x2 v[{{[0-9]+}}:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} +; GCN-LABEL: {{^}}atomic_cmpxchg_i64_offset: +; CIVI: buffer_atomic_cmpswap_x2 v[{{[0-9]+}}:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} +; GFX9: global_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} define amdgpu_kernel void @atomic_cmpxchg_i64_offset(i64 addrspace(1)* %out, i64 %in, i64 %old) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 @@ -858,9 +975,11 @@ ret void } -; FUNC-LABEL: {{^}}atomic_cmpxchg_i64_soffset: -; GCN: s_mov_b32 [[SREG:s[0-9]+]], 0x11940 -; GCN: buffer_atomic_cmpswap_x2 v[{{[0-9]+}}:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], [[SREG]]{{$}} +; GCN-LABEL: {{^}}atomic_cmpxchg_i64_soffset: +; CIVI: s_mov_b32 [[SREG:s[0-9]+]], 0x11940 +; CIVI: buffer_atomic_cmpswap_x2 v[{{[0-9]+}}:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], [[SREG]]{{$}} + +; GFX9: global_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}} define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(i64 addrspace(1)* %out, i64 %in, i64 %old) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 9000 @@ -868,9 +987,11 @@ ret void } -; FUNC-LABEL: {{^}}atomic_cmpxchg_i64_ret_offset: -; GCN: buffer_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} -; GCN: buffer_store_dwordx2 v{{\[}}[[RET]]: +; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_offset: +; CIVI: buffer_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} +; CIVI: buffer_store_dwordx2 v{{\[}}[[RET]]: + +; GFX9: global_atomic_cmpswap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}} define amdgpu_kernel void @atomic_cmpxchg_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %old) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 @@ -880,10 +1001,10 @@ ret void } -; FUNC-LABEL: {{^}}atomic_cmpxchg_i64_addr64_offset: +; GCN-LABEL: {{^}}atomic_cmpxchg_i64_addr64_offset: ; CI: buffer_atomic_cmpswap_x2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}} - ; VI: flat_atomic_cmpswap_x2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GFX9: global_atomic_cmpswap_x2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}] offset:32{{$}} define amdgpu_kernel void @atomic_cmpxchg_i64_addr64_offset(i64 addrspace(1)* %out, i64 %in, i64 %index, i64 %old) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -892,10 +1013,12 @@ ret void } -; FUNC-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64_offset: +; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64_offset: ; CI: buffer_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}} ; VI: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} -; GCN: buffer_store_dwordx2 v{{\[}}[[RET]]: +; CIVI: buffer_store_dwordx2 v{{\[}}[[RET]]: + +; GFX9: global_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset:32 glc{{$}} define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index, i64 %old) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -906,17 +1029,20 @@ ret void } -; FUNC-LABEL: {{^}}atomic_cmpxchg_i64: -; GCN: buffer_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN-LABEL: {{^}}atomic_cmpxchg_i64: +; CIVI: buffer_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GFX9: global_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}} define amdgpu_kernel void @atomic_cmpxchg_i64(i64 addrspace(1)* %out, i64 %in, i64 %old) { entry: %val = cmpxchg volatile i64 addrspace(1)* %out, i64 %old, i64 %in seq_cst seq_cst ret void } -; FUNC-LABEL: {{^}}atomic_cmpxchg_i64_ret: -; GCN: buffer_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc -; GCN: buffer_store_dwordx2 v{{\[}}[[RET]]: +; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret: +; CIVI: buffer_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; CIVI: buffer_store_dwordx2 v{{\[}}[[RET]]: + +; GFX9: global_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} define amdgpu_kernel void @atomic_cmpxchg_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %old) { entry: %val = cmpxchg volatile i64 addrspace(1)* %out, i64 %old, i64 %in seq_cst seq_cst @@ -925,9 +1051,10 @@ ret void } -; FUNC-LABEL: {{^}}atomic_cmpxchg_i64_addr64: +; GCN-LABEL: {{^}}atomic_cmpxchg_i64_addr64: ; CI: buffer_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} ; VI: flat_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}} +; GFX9: global_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}} define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(i64 addrspace(1)* %out, i64 %in, i64 %index, i64 %old) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -935,10 +1062,12 @@ ret void } -; FUNC-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64: +; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64: ; CI: buffer_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} ; VI: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} -; GCN: buffer_store_dwordx2 v{{\[}}[[RET]]: +; CIVI: buffer_store_dwordx2 v{{\[}}[[RET]]: + +; GFX9: global_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}} define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in, i64 %index, i64 %old) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -948,10 +1077,12 @@ ret void } -; FUNC-LABEL: {{^}}atomic_load_i64_offset: +; GCN-LABEL: {{^}}atomic_load_i64_offset: ; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} ; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] offset:32 glc{{$}} define amdgpu_kernel void @atomic_load_i64_offset(i64 addrspace(1)* %in, i64 addrspace(1)* %out) { entry: %gep = getelementptr i64, i64 addrspace(1)* %in, i64 4 @@ -960,10 +1091,12 @@ ret void } -; FUNC-LABEL: {{^}}atomic_load_i64: +; GCN-LABEL: {{^}}atomic_load_i64: ; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc ; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} define amdgpu_kernel void @atomic_load_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %out) { entry: %val = load atomic i64, i64 addrspace(1)* %in seq_cst, align 8 @@ -971,10 +1104,12 @@ ret void } -; FUNC-LABEL: {{^}}atomic_load_i64_addr64_offset: +; GCN-LABEL: {{^}}atomic_load_i64_addr64_offset: ; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}} ; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}} -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] offset:32 glc{{$}} define amdgpu_kernel void @atomic_load_i64_addr64_offset(i64 addrspace(1)* %in, i64 addrspace(1)* %out, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %in, i64 %index @@ -984,10 +1119,12 @@ ret void } -; FUNC-LABEL: {{^}}atomic_load_i64_addr64: +; GCN-LABEL: {{^}}atomic_load_i64_addr64: ; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} ; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}} -; GCN: buffer_store_dwordx2 [[RET]] +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}} define amdgpu_kernel void @atomic_load_i64_addr64(i64 addrspace(1)* %in, i64 addrspace(1)* %out, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %in, i64 %index @@ -996,9 +1133,10 @@ ret void } -; FUNC-LABEL: {{^}}atomic_store_i64_offset: +; GCN-LABEL: {{^}}atomic_store_i64_offset: ; CI: buffer_store_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} ; VI: flat_store_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} +; GFX9: global_store_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] offset:32 glc{{$}} define amdgpu_kernel void @atomic_store_i64_offset(i64 %in, i64 addrspace(1)* %out) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 @@ -1006,18 +1144,20 @@ ret void } -; FUNC-LABEL: {{^}}atomic_store_i64: +; GCN-LABEL: {{^}}atomic_store_i64: ; CI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc ; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}] glc +; GFX9: global_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}] glc define amdgpu_kernel void @atomic_store_i64(i64 %in, i64 addrspace(1)* %out) { entry: store atomic i64 %in, i64 addrspace(1)* %out seq_cst, align 8 ret void } -; FUNC-LABEL: {{^}}atomic_store_i64_addr64_offset: +; GCN-LABEL: {{^}}atomic_store_i64_addr64_offset: ; CI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}} ; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}] glc{{$}} +; GFX9: global_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}] offset:32 glc{{$}} define amdgpu_kernel void @atomic_store_i64_addr64_offset(i64 %in, i64 addrspace(1)* %out, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index @@ -1026,9 +1166,10 @@ ret void } -; FUNC-LABEL: {{^}}atomic_store_i64_addr64: +; GCN-LABEL: {{^}}atomic_store_i64_addr64: ; CI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} ; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}] glc{{$}} +; GFX9: global_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}] glc{{$}} define amdgpu_kernel void @atomic_store_i64_addr64(i64 %in, i64 addrspace(1)* %out, i64 %index) { entry: %ptr = getelementptr i64, i64 addrspace(1)* %out, i64 %index Index: test/CodeGen/AMDGPU/indirect-addressing-si.ll =================================================================== --- test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -321,7 +321,7 @@ ; FIXME: Why is vector copied in between? -; GCN-DAG: {{buffer|flat}}_load_dword [[IDX0:v[0-9]+]] +; GCN-DAG: {{buffer|flat|global}}_load_dword [[IDX0:v[0-9]+]] ; GCN-DAG: s_mov_b32 [[S_ELT1:s[0-9]+]], 9 ; GCN-DAG: s_mov_b32 [[S_ELT0:s[0-9]+]], 7 ; GCN-DAG: v_mov_b32_e32 [[VEC_ELT0:v[0-9]+]], [[S_ELT0]] @@ -400,7 +400,7 @@ ; GCN-LABEL: {{^}}insert_vgpr_offset_multiple_in_block: ; GCN-DAG: s_load_dwordx4 s{{\[}}[[S_ELT0:[0-9]+]]:[[S_ELT3:[0-9]+]]{{\]}} -; GCN-DAG: {{buffer|flat}}_load_dword [[IDX0:v[0-9]+]] +; GCN-DAG: {{buffer|flat|global}}_load_dword [[IDX0:v[0-9]+]] ; GCN-DAG: v_mov_b32 [[INS0:v[0-9]+]], 62 ; GCN-DAG: v_mov_b32_e32 v[[VEC_ELT3:[0-9]+]], s[[S_ELT3]] Index: test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll =================================================================== --- test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll +++ test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll @@ -189,14 +189,14 @@ } ; GCN-LABEL: {{^}}v_insertelement_v2i16_0: -; GCN-DAG: flat_load_dword [[VEC:v[0-9]+]] +; GCN-DAG: {{flat|global}}_load_dword [[VEC:v[0-9]+]] ; CIVI: v_and_b32_e32 [[ELT1:v[0-9]+]], 0xffff0000, [[VEC]] ; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], 0x3e7, [[ELT1]] ; GFX9-DAG: s_movk_i32 [[ELT0:s[0-9]+]], 0x3e7{{$}} ; GFX9-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff{{$}} ; GFX9: v_bfi_b32 [[RES:v[0-9]+]], [[MASK]], [[ELT0]], [[VEC]] -; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]] +; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]] define amdgpu_kernel void @v_insertelement_v2i16_0(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 @@ -209,7 +209,7 @@ } ; GCN-LABEL: {{^}}v_insertelement_v2i16_0_reghi: -; GCN-DAG: flat_load_dword [[VEC:v[0-9]+]] +; GCN-DAG: {{flat|global}}_load_dword [[VEC:v[0-9]+]] ; GCN-DAG: s_load_dword [[ELT0:s[0-9]+]] ; CIVI-DAG: s_lshr_b32 [[ELT0_SHIFT:s[0-9]+]], [[ELT0]], 16 @@ -220,7 +220,7 @@ ; GFX9-DAG: v_lshrrev_b32_e64 [[ELT0_SHIFT:v[0-9]+]], 16, [[ELT0]] ; GFX9: v_and_or_b32 [[RES:v[0-9]+]], [[VEC]], [[MASK]], [[ELT0_SHIFT]] -; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]] +; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]] define amdgpu_kernel void @v_insertelement_v2i16_0_reghi(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, i32 %elt.arg) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 @@ -235,7 +235,7 @@ } ; GCN-LABEL: {{^}}v_insertelement_v2i16_0_inlineimm: -; GCN-DAG: flat_load_dword [[VEC:v[0-9]+]] +; GCN-DAG: {{flat|global}}_load_dword [[VEC:v[0-9]+]] ; CIVI: v_and_b32_e32 [[ELT1:v[0-9]+]], 0xffff0000, [[VEC]] ; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], 53, [[ELT1]] @@ -243,7 +243,7 @@ ; GFX9-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff{{$}} ; GFX9: v_bfi_b32 [[RES:v[0-9]+]], [[MASK]], 53, [[VEC]] -; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]] +; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]] define amdgpu_kernel void @v_insertelement_v2i16_0_inlineimm(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 @@ -258,14 +258,14 @@ ; FIXME: fold lshl_or c0, c1, v0 -> or (c0 << c1), v0 ; GCN-LABEL: {{^}}v_insertelement_v2i16_1: -; GCN-DAG: flat_load_dword [[VEC:v[0-9]+]] +; GCN-DAG: {{flat|global}}_load_dword [[VEC:v[0-9]+]] ; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], 0x3e70000, [[VEC]] ; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x3e7 ; GFX9-DAG: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] ; GFX9: v_lshl_or_b32 [[RES:v[0-9]+]], [[K]], 16, [[ELT0]] -; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]] +; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]] define amdgpu_kernel void @v_insertelement_v2i16_1(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 @@ -278,11 +278,11 @@ } ; GCN-LABEL: {{^}}v_insertelement_v2i16_1_inlineimm: -; GCN: flat_load_dword [[VEC:v[0-9]+]] +; GCN: {{flat|global}}_load_dword [[VEC:v[0-9]+]] ; GCN: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] ; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], 0xfff10000, [[ELT0]] ; GFX9: v_lshl_or_b32 [[RES:v[0-9]+]], -15, 16, [[ELT0]] -; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]] +; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]] define amdgpu_kernel void @v_insertelement_v2i16_1_inlineimm(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 @@ -295,7 +295,7 @@ } ; GCN-LABEL: {{^}}v_insertelement_v2f16_0: -; GCN-DAG: flat_load_dword [[VEC:v[0-9]+]] +; GCN-DAG: {{flat|global}}_load_dword [[VEC:v[0-9]+]] ; CIVI: v_and_b32_e32 [[ELT1:v[0-9]+]], 0xffff0000, [[VEC]] ; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], 0x4500, [[ELT1]] @@ -304,7 +304,7 @@ ; GFX9-DAG: v_lshrrev_b32_e32 [[ELT1:v[0-9]+]], 16, [[VEC]] ; GFX9: v_lshl_or_b32 [[RES:v[0-9]+]], [[ELT1]], 16, [[ELT0]] -; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]] +; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]] define amdgpu_kernel void @v_insertelement_v2f16_0(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 @@ -317,14 +317,14 @@ } ; GCN-LABEL: {{^}}v_insertelement_v2f16_0_inlineimm: -; GCN: flat_load_dword [[VEC:v[0-9]+]] +; GCN: {{flat|global}}_load_dword [[VEC:v[0-9]+]] ; CIVI: v_and_b32_e32 [[ELT1:v[0-9]+]], 0xffff0000, [[VEC]] ; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], 53, [[ELT1]] ; GFX9: v_lshrrev_b32_e32 [[ELT1:v[0-9]+]], 16, [[VEC]] ; GFX9: v_lshl_or_b32 [[RES:v[0-9]+]], [[ELT1]], 16, 53 -; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]] +; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]] define amdgpu_kernel void @v_insertelement_v2f16_0_inlineimm(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 @@ -337,14 +337,14 @@ } ; GCN-LABEL: {{^}}v_insertelement_v2f16_1: -; GCN-DAG: flat_load_dword [[VEC:v[0-9]+]] +; GCN-DAG: {{flat|global}}_load_dword [[VEC:v[0-9]+]] ; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], 0x45000000, [[VEC]] ; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x4500 ; GFX9-DAG: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] ; GFX9: v_lshl_or_b32 [[RES:v[0-9]+]], [[K]], 16, [[ELT0]] -; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]] +; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]] define amdgpu_kernel void @v_insertelement_v2f16_1(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 @@ -357,11 +357,11 @@ } ; GCN-LABEL: {{^}}v_insertelement_v2f16_1_inlineimm: -; GCN: flat_load_dword [[VEC:v[0-9]+]] +; GCN: {{flat|global}}_load_dword [[VEC:v[0-9]+]] ; GCN: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VEC]] ; CIVI: v_or_b32_e32 [[RES:v[0-9]+]], 0x230000, [[ELT0]] ; GFX9: v_lshl_or_b32 [[RES:v[0-9]+]], 35, 16, [[ELT0]] -; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]] +; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RES]] define amdgpu_kernel void @v_insertelement_v2f16_1_inlineimm(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 @@ -382,7 +382,7 @@ ; GCN-DAG: s_lshl_b32 [[SCALED_IDX:s[0-9]+]], [[IDX]], 16 ; GCN-DAG: s_lshl_b32 [[MASK:s[0-9]+]], 0xffff, [[SCALED_IDX]] ; GCN: v_bfi_b32 [[RESULT:v[0-9]+]], [[MASK]], [[K]], [[VVEC]] -; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] +; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] define amdgpu_kernel void @s_insertelement_v2i16_dynamic(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(2)* %vec.ptr, i32 addrspace(2)* %idx.ptr) #0 { %idx = load volatile i32, i32 addrspace(2)* %idx.ptr %vec = load <2 x i16>, <2 x i16> addrspace(2)* %vec.ptr @@ -392,13 +392,13 @@ } ; GCN-LABEL: {{^}}v_insertelement_v2i16_dynamic_sgpr: -; GCN-DAG: flat_load_dword [[VEC:v[0-9]+]] +; GCN-DAG: {{flat|global}}_load_dword [[VEC:v[0-9]+]] ; GCN-DAG: s_load_dword [[IDX:s[0-9]+]] ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7 ; GCN-DAG: s_lshl_b32 [[SCALED_IDX:s[0-9]+]], [[IDX]], 16 ; GCN-DAG: s_lshl_b32 [[MASK:s[0-9]+]], 0xffff, [[SCALED_IDX]] ; GCN: v_bfi_b32 [[RESULT:v[0-9]+]], [[MASK]], [[K]], [[VEC]] -; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] +; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] define amdgpu_kernel void @v_insertelement_v2i16_dynamic_sgpr(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, i32 %idx) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 @@ -411,8 +411,8 @@ } ; GCN-LABEL: {{^}}v_insertelement_v2i16_dynamic_vgpr: -; GCN: flat_load_dword [[IDX:v[0-9]+]] -; GCN: flat_load_dword [[VEC:v[0-9]+]] +; GCN: {{flat|global}}_load_dword [[IDX:v[0-9]+]] +; GCN: {{flat|global}}_load_dword [[VEC:v[0-9]+]] ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7 ; GFX89-DAG: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}} @@ -423,7 +423,7 @@ ; CI-DAG: v_lshl_b32_e32 [[MASK:v[0-9]+]], 0xffff, [[SCALED_IDX]] ; GCN: v_bfi_b32 [[RESULT:v[0-9]+]], [[MASK]], [[K]], [[VEC]] -; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] +; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] define amdgpu_kernel void @v_insertelement_v2i16_dynamic_vgpr(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, i32 addrspace(1)* %idx.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 @@ -438,8 +438,8 @@ } ; GCN-LABEL: {{^}}v_insertelement_v2f16_dynamic_vgpr: -; GCN: flat_load_dword [[IDX:v[0-9]+]] -; GCN: flat_load_dword [[VEC:v[0-9]+]] +; GCN: {{flat|global}}_load_dword [[IDX:v[0-9]+]] +; GCN: {{flat|global}}_load_dword [[VEC:v[0-9]+]] ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x1234 ; GFX89-DAG: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}} @@ -450,7 +450,7 @@ ; CI-DAG: v_lshl_b32_e32 [[MASK:v[0-9]+]], 0xffff, [[SCALED_IDX]] ; GCN: v_bfi_b32 [[RESULT:v[0-9]+]], [[MASK]], [[K]], [[VEC]] -; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] +; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]] define amdgpu_kernel void @v_insertelement_v2f16_dynamic_vgpr(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in, i32 addrspace(1)* %idx.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 %tid.ext = sext i32 %tid to i64 Index: test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll @@ -36,8 +36,8 @@ } ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32: -; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] -; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] ; SI: v_cvt_pkrtz_f16_f32_e32 v{{[0-9]+}}, [[A]], [[B]] ; GFX89: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, [[A]], [[B]] define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { @@ -54,7 +54,7 @@ } ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_reg_imm: -; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] ; GCN: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, [[A]], 1.0 define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_reg_imm(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -68,7 +68,7 @@ } ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_imm_reg: -; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] ; SI: v_cvt_pkrtz_f16_f32_e32 v{{[0-9]+}}, 1.0, [[A]] ; GFX89: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, 1.0, [[A]] define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_imm_reg(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { @@ -83,8 +83,8 @@ } ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_fneg_lo: -; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] -; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] ; GCN: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, -[[A]], [[B]] define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_lo(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -101,8 +101,8 @@ } ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_fneg_hi: -; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] -; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] ; GCN: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, [[A]], -[[B]] define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_hi(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -119,8 +119,8 @@ } ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_fneg_lo_hi: -; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] -; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] ; GCN: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, -[[A]], -[[B]] define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_lo_hi(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -138,8 +138,8 @@ } ; GCN-LABEL: {{^}}v_cvt_pkrtz_v2f16_f32_fneg_fabs_lo_fneg_hi: -; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] -; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] ; GCN: v_cvt_pkrtz_f16_f32_e64 v{{[0-9]+}}, -|[[A]]|, -[[B]] define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_fabs_lo_fneg_hi(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() Index: test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll @@ -4,7 +4,7 @@ ; GCN-LABEL: {{^}}test_barrier: ; GFX8: buffer_store_dword ; GFX8: s_waitcnt -; GFX9: flat_store_dword +; GFX9: global_store_dword ; GFX9-NOT: s_waitcnt ; GCN: s_barrier define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out) #0 { Index: test/CodeGen/AMDGPU/lshr.v2i16.ll =================================================================== --- test/CodeGen/AMDGPU/lshr.v2i16.ll +++ test/CodeGen/AMDGPU/lshr.v2i16.ll @@ -20,8 +20,8 @@ } ; GCN-LABEL: {{^}}v_lshr_v2i16: -; GCN: {{buffer|flat}}_load_dword [[LHS:v[0-9]+]] -; GCN: {{buffer|flat}}_load_dword [[RHS:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[LHS:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[RHS:v[0-9]+]] ; GFX9: v_pk_lshrrev_b16 [[RESULT:v[0-9]+]], [[RHS]], [[LHS]] ; VI: v_lshrrev_b16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} @@ -52,7 +52,7 @@ ; GCN-LABEL: {{^}}lshr_v_s_v2i16: ; GFX9: s_load_dword [[RHS:s[0-9]+]] -; GFX9: {{buffer|flat}}_load_dword [[LHS:v[0-9]+]] +; GFX9: {{buffer|flat|global}}_load_dword [[LHS:v[0-9]+]] ; GFX9: v_pk_lshrrev_b16 [[RESULT:v[0-9]+]], [[RHS]], [[LHS]] define amdgpu_kernel void @lshr_v_s_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -67,7 +67,7 @@ ; GCN-LABEL: {{^}}lshr_s_v_v2i16: ; GFX9: s_load_dword [[LHS:s[0-9]+]] -; GFX9: {{buffer|flat}}_load_dword [[RHS:v[0-9]+]] +; GFX9: {{buffer|flat|global}}_load_dword [[RHS:v[0-9]+]] ; GFX9: v_pk_lshrrev_b16 [[RESULT:v[0-9]+]], [[RHS]], [[LHS]] define amdgpu_kernel void @lshr_s_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -81,7 +81,7 @@ } ; GCN-LABEL: {{^}}lshr_imm_v_v2i16: -; GCN: {{buffer|flat}}_load_dword [[RHS:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[RHS:v[0-9]+]] ; GFX9: v_pk_lshrrev_b16 [[RESULT:v[0-9]+]], [[RHS]], 8 define amdgpu_kernel void @lshr_imm_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -95,7 +95,7 @@ } ; GCN-LABEL: {{^}}lshr_v_imm_v2i16: -; GCN: {{buffer|flat}}_load_dword [[LHS:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[LHS:v[0-9]+]] ; GFX9: v_pk_lshrrev_b16 [[RESULT:v[0-9]+]], 8, [[LHS]] define amdgpu_kernel void @lshr_v_imm_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -109,11 +109,11 @@ } ; GCN-LABEL: {{^}}v_lshr_v4i16: -; GCN: {{buffer|flat}}_load_dwordx2 -; GCN: {{buffer|flat}}_load_dwordx2 +; GCN: {{buffer|flat|global}}_load_dwordx2 +; GCN: {{buffer|flat|global}}_load_dwordx2 ; GFX9: v_pk_lshrrev_b16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; GFX9: v_pk_lshrrev_b16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; GCN: {{buffer|flat}}_store_dwordx2 +; GCN: {{buffer|flat|global}}_store_dwordx2 define amdgpu_kernel void @v_lshr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -128,10 +128,10 @@ } ; GCN-LABEL: {{^}}lshr_v_imm_v4i16: -; GCN: {{buffer|flat}}_load_dwordx2 +; GCN: {{buffer|flat|global}}_load_dwordx2 ; GFX9: v_pk_lshrrev_b16 v{{[0-9]+}}, 8, v{{[0-9]+}} ; GFX9: v_pk_lshrrev_b16 v{{[0-9]+}}, 8, v{{[0-9]+}} -; GCN: {{buffer|flat}}_store_dwordx2 +; GCN: {{buffer|flat|global}}_store_dwordx2 define amdgpu_kernel void @lshr_v_imm_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 Index: test/CodeGen/AMDGPU/min.ll =================================================================== --- test/CodeGen/AMDGPU/min.ll +++ test/CodeGen/AMDGPU/min.ll @@ -348,8 +348,8 @@ ; SI: buffer_load_ubyte ; SI: v_min_u32_e32 -; GFX89: flat_load_ubyte -; GFX89: flat_load_ubyte +; GFX89: {{flat|global}}_load_ubyte +; GFX89: {{flat|global}}_load_ubyte ; GFX89: v_min_u16_e32 ; EG: MIN_UINT Index: test/CodeGen/AMDGPU/pack.v2f16.ll =================================================================== --- test/CodeGen/AMDGPU/pack.v2f16.ll +++ test/CodeGen/AMDGPU/pack.v2f16.ll @@ -56,8 +56,8 @@ } ; GCN-LABEL: {{^}}v_pack_v2f16: -; GFX9: flat_load_dword [[VAL0:v[0-9]+]] -; GFX9: flat_load_dword [[VAL1:v[0-9]+]] +; GFX9: global_load_dword [[VAL0:v[0-9]+]] +; GFX9: global_load_dword [[VAL1:v[0-9]+]] ; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VAL0]] ; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[ELT0]] @@ -81,8 +81,8 @@ } ; GCN-LABEL: {{^}}v_pack_v2f16_user: -; GFX9: flat_load_dword [[VAL0:v[0-9]+]] -; GFX9: flat_load_dword [[VAL1:v[0-9]+]] +; GFX9: global_load_dword [[VAL0:v[0-9]+]] +; GFX9: global_load_dword [[VAL1:v[0-9]+]] ; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VAL0]] ; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[ELT0]] @@ -108,7 +108,7 @@ } ; GCN-LABEL: {{^}}v_pack_v2f16_imm_lo: -; GFX9-DAG: flat_load_dword [[VAL1:v[0-9]+]] +; GFX9-DAG: global_load_dword [[VAL1:v[0-9]+]] ; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x1234{{$}} ; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[K]] @@ -128,7 +128,7 @@ } ; GCN-LABEL: {{^}}v_pack_v2f16_inline_imm_lo: -; GFX9-DAG: flat_load_dword [[VAL1:v[0-9]+]] +; GFX9-DAG: global_load_dword [[VAL1:v[0-9]+]] ; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x4400{{$}} ; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[K]] @@ -149,7 +149,7 @@ } ; GCN-LABEL: {{^}}v_pack_v2f16_imm_hi: -; GFX9-DAG: flat_load_dword [[VAL0:v[0-9]+]] +; GFX9-DAG: global_load_dword [[VAL0:v[0-9]+]] ; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x1234 ; GFX9: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL0]] @@ -171,7 +171,7 @@ } ; GCN-LABEL: {{^}}v_pack_v2f16_inline_f16imm_hi: -; GFX9-DAG: flat_load_dword [[VAL:v[0-9]+]] +; GFX9-DAG: global_load_dword [[VAL:v[0-9]+]] ; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x3c00 ; GFX9: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL]] @@ -193,7 +193,7 @@ } ; GCN-LABEL: {{^}}v_pack_v2f16_inline_imm_hi: -; GFX9: flat_load_dword [[VAL:v[0-9]+]] +; GFX9: global_load_dword [[VAL:v[0-9]+]] ; GFX9: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL]] ; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], 64, 16, [[MASKED]] Index: test/CodeGen/AMDGPU/pack.v2i16.ll =================================================================== --- test/CodeGen/AMDGPU/pack.v2i16.ll +++ test/CodeGen/AMDGPU/pack.v2i16.ll @@ -52,8 +52,8 @@ } ; GCN-LABEL: {{^}}v_pack_v2i16: -; GFX9: flat_load_dword [[VAL0:v[0-9]+]] -; GFX9: flat_load_dword [[VAL1:v[0-9]+]] +; GFX9: global_load_dword [[VAL0:v[0-9]+]] +; GFX9: global_load_dword [[VAL1:v[0-9]+]] ; GFX9: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL0]] ; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[MASKED]] @@ -75,8 +75,8 @@ } ; GCN-LABEL: {{^}}v_pack_v2i16_user: -; GFX9: flat_load_dword [[VAL0:v[0-9]+]] -; GFX9: flat_load_dword [[VAL1:v[0-9]+]] +; GFX9: global_load_dword [[VAL0:v[0-9]+]] +; GFX9: global_load_dword [[VAL1:v[0-9]+]] ; GFX9: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL0]] ; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[MASKED]] @@ -100,7 +100,7 @@ } ; GCN-LABEL: {{^}}v_pack_v2i16_imm_lo: -; GFX9-DAG: flat_load_dword [[VAL1:v[0-9]+]] +; GFX9-DAG: global_load_dword [[VAL1:v[0-9]+]] ; GFX9-DENORM-DAG: s_movk_i32 [[K:s[0-9]+]], 0x7b{{$}} ; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x7b{{$}} @@ -121,7 +121,7 @@ } ; GCN-LABEL: {{^}}v_pack_v2i16_inline_imm_lo: -; GFX9: flat_load_dword [[VAL1:v[0-9]+]] +; GFX9: global_load_dword [[VAL1:v[0-9]+]] ; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, 64 ; GFX9: ; use [[PACKED]] @@ -139,7 +139,7 @@ } ; GCN-LABEL: {{^}}v_pack_v2i16_imm_hi: -; GFX9-DAG: flat_load_dword [[VAL0:v[0-9]+]] +; GFX9-DAG: global_load_dword [[VAL0:v[0-9]+]] ; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x7b{{$}} ; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[K]], 16, [[VAL0]] @@ -159,7 +159,7 @@ } ; GCN-LABEL: {{^}}v_pack_v2i16_inline_imm_hi: -; GFX9: flat_load_dword [[VAL:v[0-9]+]] +; GFX9: global_load_dword [[VAL:v[0-9]+]] ; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], 7, 16, [[VAL0]] ; GFX9: ; use [[PACKED]] define amdgpu_kernel void @v_pack_v2i16_inline_imm_hi(i32 addrspace(1)* %in0) #0 { Index: test/CodeGen/AMDGPU/sext-in-reg.ll =================================================================== --- test/CodeGen/AMDGPU/sext-in-reg.ll +++ test/CodeGen/AMDGPU/sext-in-reg.ll @@ -152,14 +152,14 @@ ; SI: buffer_load_dwordx2 ; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}} -; GFX89: flat_load_dwordx2 +; GFX89: {{flat|global}}_load_dwordx2 ; GFX89: v_lshlrev_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}} ; GCN: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 1 ; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} -; GFX89: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} +; GFX89: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} define amdgpu_kernel void @v_sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 { %tid = call i32 @llvm.r600.read.tidig.x() %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid @@ -179,14 +179,14 @@ ; SI: buffer_load_dwordx2 ; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}} -; GFX89: flat_load_dwordx2 +; GFX89: {{flat|global}}_load_dwordx2 ; GFX89: v_lshlrev_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}} ; GCN: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 8 ; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} -; GFX89: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} +; GFX89: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} define amdgpu_kernel void @v_sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 { %tid = call i32 @llvm.r600.read.tidig.x() %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid @@ -206,14 +206,14 @@ ; SI: buffer_load_dwordx2 ; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}} -; GFX89: flat_load_dwordx2 +; GFX89: {{flat|global}}_load_dwordx2 ; GFX89: v_lshlrev_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}} ; GCN: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 16 ; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} -; GFX89: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} +; GFX89: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} define amdgpu_kernel void @v_sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 { %tid = call i32 @llvm.r600.read.tidig.x() %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid @@ -233,11 +233,11 @@ ; SI: buffer_load_dwordx2 ; SI: v_lshl_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, -; GFX89: flat_load_dwordx2 +; GFX89: {{flat|global}}_load_dwordx2 ; GFX89: v_lshlrev_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, ; GCN: v_ashrrev_i32_e32 v[[SHR:[0-9]+]], 31, v[[LO]] -; GFX89: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[SHR]]{{\]}} +; GFX89: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[SHR]]{{\]}} define amdgpu_kernel void @v_sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #0 { %tid = call i32 @llvm.r600.read.tidig.x() %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid @@ -463,7 +463,7 @@ ; SI: buffer_load_dwordx2 ; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}} -; GFX89: flat_load_dwordx2 +; GFX89: {{flat|global}}_load_dwordx2 ; GFX89: v_lshlrev_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}} ; GCN-DAG: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 1 @@ -471,7 +471,7 @@ ; GCN-DAG: v_and_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, v[[LO]] ; GCN-DAG: v_and_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}, v[[HI]] ; SI: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} -; GFX89: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} +; GFX89: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} define amdgpu_kernel void @v_sext_in_reg_i1_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) #0 { %tid = call i32 @llvm.r600.read.tidig.x() %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid @@ -493,7 +493,7 @@ ; SI: buffer_load_dwordx2 ; SI: v_lshl_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, -; GFX89: flat_load_dwordx2 +; GFX89: {{flat|global}}_load_dwordx2 ; GFX89: v_lshlrev_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, ; GCN-DAG: v_ashrrev_i32_e32 v[[SHR:[0-9]+]], 31, v[[LO]] @@ -501,7 +501,7 @@ ; GCN-DAG: v_and_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}, v[[SHR]] ; SI: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} -; GFX89: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} +; GFX89: {{flat|global}}_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}} define amdgpu_kernel void @v_sext_in_reg_i32_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) #0 { %tid = call i32 @llvm.r600.read.tidig.x() %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid @@ -557,7 +557,7 @@ } ; FUNC-LABEL: {{^}}v_sext_in_reg_i1_i16: -; GCN: {{buffer|flat}}_load_ushort [[VAL:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_ushort [[VAL:v[0-9]+]] ; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[VAL]], 0, 1{{$}} ; GCN: ds_write_b16 v{{[0-9]+}}, [[BFE]] @@ -574,8 +574,8 @@ } ; FUNC-LABEL: {{^}}v_sext_in_reg_i1_i16_nonload: -; GCN: {{buffer|flat}}_load_ushort [[VAL0:v[0-9]+]] -; GCN: {{buffer|flat}}_load_ushort [[VAL1:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_ushort [[VAL0:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_ushort [[VAL1:v[0-9]+]] ; SI: v_lshlrev_b32_e32 [[REG:v[0-9]+]], [[VAL1]], [[VAL0]] ; GFX89: v_lshlrev_b16_e32 [[REG:v[0-9]+]], [[VAL1]], [[VAL0]] Index: test/CodeGen/AMDGPU/shl.v2i16.ll =================================================================== --- test/CodeGen/AMDGPU/shl.v2i16.ll +++ test/CodeGen/AMDGPU/shl.v2i16.ll @@ -24,8 +24,8 @@ } ; GCN-LABEL: {{^}}v_shl_v2i16: -; GCN: {{buffer|flat}}_load_dword [[LHS:v[0-9]+]] -; GCN: {{buffer|flat}}_load_dword [[RHS:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[LHS:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[RHS:v[0-9]+]] ; GFX9: v_pk_lshlrev_b16 [[RESULT:v[0-9]+]], [[RHS]], [[LHS]] ; VI: v_lshlrev_b16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} @@ -55,7 +55,7 @@ ; GCN-LABEL: {{^}}shl_v_s_v2i16: ; GFX9: s_load_dword [[RHS:s[0-9]+]] -; GFX9: {{buffer|flat}}_load_dword [[LHS:v[0-9]+]] +; GFX9: {{buffer|flat|global}}_load_dword [[LHS:v[0-9]+]] ; GFX9: v_pk_lshlrev_b16 [[RESULT:v[0-9]+]], [[RHS]], [[LHS]] define amdgpu_kernel void @shl_v_s_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -70,7 +70,7 @@ ; GCN-LABEL: {{^}}shl_s_v_v2i16: ; GFX9: s_load_dword [[LHS:s[0-9]+]] -; GFX9: {{buffer|flat}}_load_dword [[RHS:v[0-9]+]] +; GFX9: {{buffer|flat|global}}_load_dword [[RHS:v[0-9]+]] ; GFX9: v_pk_lshlrev_b16 [[RESULT:v[0-9]+]], [[RHS]], [[LHS]] define amdgpu_kernel void @shl_s_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in, <2 x i16> %sgpr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -84,7 +84,7 @@ } ; GCN-LABEL: {{^}}shl_imm_v_v2i16: -; GCN: {{buffer|flat}}_load_dword [[RHS:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[RHS:v[0-9]+]] ; GFX9: v_pk_lshlrev_b16 [[RESULT:v[0-9]+]], [[RHS]], 8 define amdgpu_kernel void @shl_imm_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -98,7 +98,7 @@ } ; GCN-LABEL: {{^}}shl_v_imm_v2i16: -; GCN: {{buffer|flat}}_load_dword [[LHS:v[0-9]+]] +; GCN: {{buffer|flat|global}}_load_dword [[LHS:v[0-9]+]] ; GFX9: v_pk_lshlrev_b16 [[RESULT:v[0-9]+]], 8, [[LHS]] define amdgpu_kernel void @shl_v_imm_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -112,11 +112,11 @@ } ; GCN-LABEL: {{^}}v_shl_v4i16: -; GCN: {{buffer|flat}}_load_dwordx2 -; GCN: {{buffer|flat}}_load_dwordx2 +; GCN: {{buffer|flat|global}}_load_dwordx2 +; GCN: {{buffer|flat|global}}_load_dwordx2 ; GFX9: v_pk_lshlrev_b16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; GFX9: v_pk_lshlrev_b16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -; GCN: {{buffer|flat}}_store_dwordx2 +; GCN: {{buffer|flat|global}}_store_dwordx2 define amdgpu_kernel void @v_shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -131,10 +131,10 @@ } ; GCN-LABEL: {{^}}shl_v_imm_v4i16: -; GCN: {{buffer|flat}}_load_dwordx2 +; GCN: {{buffer|flat|global}}_load_dwordx2 ; GFX9: v_pk_lshlrev_b16 v{{[0-9]+}}, 8, v{{[0-9]+}} ; GFX9: v_pk_lshlrev_b16 v{{[0-9]+}}, 8, v{{[0-9]+}} -; GCN: {{buffer|flat}}_store_dwordx2 +; GCN: {{buffer|flat|global}}_store_dwordx2 define amdgpu_kernel void @shl_v_imm_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 Index: test/CodeGen/AMDGPU/sminmax.v2i16.ll =================================================================== --- test/CodeGen/AMDGPU/sminmax.v2i16.ll +++ test/CodeGen/AMDGPU/sminmax.v2i16.ll @@ -35,7 +35,7 @@ } ; GCN-LABEL: {{^}}v_abs_v2i16: -; GFX9: flat_load_dword [[VAL:v[0-9]+]] +; GFX9: global_load_dword [[VAL:v[0-9]+]] ; GFX9: v_pk_sub_i16 [[SUB:v[0-9]+]], 0, [[VAL]] ; GFX9: v_pk_max_i16 [[MAX:v[0-9]+]], [[VAL]], [[SUB]] ; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[MAX]], 2 Index: test/CodeGen/AMDGPU/store-global.ll =================================================================== --- test/CodeGen/AMDGPU/store-global.ll +++ test/CodeGen/AMDGPU/store-global.ll @@ -1,5 +1,6 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SIVI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SIVI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s @@ -10,7 +11,8 @@ ; CM: MEM_RAT MSKOR ; CM-NOT: MEM_RAT MSKOR -; GCN: buffer_store_byte +; SIVI: buffer_store_byte +; GFX9: global_store_byte define amdgpu_kernel void @store_i1(i1 addrspace(1)* %out) { entry: store i1 true, i1 addrspace(1)* %out @@ -40,8 +42,8 @@ ; EG: MOV T[[RW_GPR]].Y, 0.0 ; EG: MOV * T[[RW_GPR]].Z, 0.0 -; GCN: buffer_store_byte - +; SIVI: buffer_store_byte +; GFX9: global_store_byte define amdgpu_kernel void @store_i8(i8 addrspace(1)* %out, i8 %in) { entry: store i8 %in, i8 addrspace(1)* %out @@ -74,7 +76,8 @@ ; EG: MOV T[[RW_GPR]].Y, 0.0 ; EG: MOV * T[[RW_GPR]].Z, 0.0 -; GCN: buffer_store_short +; SIVI: buffer_store_short +; GFX9: global_store_short define amdgpu_kernel void @store_i16(i16 addrspace(1)* %out, i16 %in) { entry: store i16 %in, i16 addrspace(1)* %out @@ -83,8 +86,11 @@ ; FUNC-LABEL: {{^}}store_i24: ; GCN: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 16 -; GCN-DAG: buffer_store_byte -; GCN-DAG: buffer_store_short +; SIVI-DAG: buffer_store_byte +; SIVI-DAG: buffer_store_short + +; GFX9-DAG: global_store_byte +; GFX9-DAG: global_store_short ; EG: MEM_RAT MSKOR ; EG: MEM_RAT MSKOR @@ -97,7 +103,8 @@ ; FUNC-LABEL: {{^}}store_i25: ; GCN: s_and_b32 [[AND:s[0-9]+]], s{{[0-9]+}}, 0x1ffffff{{$}} ; GCN: v_mov_b32_e32 [[VAND:v[0-9]+]], [[AND]] -; GCN: buffer_store_dword [[VAND]] +; SIVI: buffer_store_dword [[VAND]] +; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[VAND]] ; EG: MEM_RAT_CACHELESS STORE_RAW ; EG-NOT: MEM_RAT @@ -118,7 +125,8 @@ ; CM: MEM_RAT MSKOR ; CM-NOT: MEM_RAT MSKOR -; GCN: buffer_store_short +; SIVI: buffer_store_short +; GFX9: global_store_short define amdgpu_kernel void @store_v2i8(<2 x i8> addrspace(1)* %out, <2 x i32> %in) { entry: %0 = trunc <2 x i32> %in to <2 x i8> @@ -149,7 +157,8 @@ ; CM: MEM_RAT_CACHELESS STORE_DWORD -; GCN: buffer_store_dword +; SIVI: buffer_store_dword +; GFX9: global_store_dword define amdgpu_kernel void @store_v2i16(<2 x i16> addrspace(1)* %out, <2 x i32> %in) { entry: %0 = trunc <2 x i32> %in to <2 x i16> @@ -168,8 +177,11 @@ ; CM-NOT: MEM_RAT MSKOR ; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD -; SI: buffer_store_short -; SI: buffer_store_short +; SIVI: buffer_store_short +; SIVI: buffer_store_short + +; GFX9: global_store_short +; GFX9: global_store_short define amdgpu_kernel void @store_v2i16_unaligned(<2 x i16> addrspace(1)* %out, <2 x i32> %in) { entry: %0 = trunc <2 x i32> %in to <2 x i16> @@ -182,7 +194,8 @@ ; CM: MEM_RAT_CACHELESS STORE_DWORD -; GCN: buffer_store_dword +; SIVI: buffer_store_dword +; GFX9: global_store_dword define amdgpu_kernel void @store_v4i8(<4 x i8> addrspace(1)* %out, <4 x i32> %in) { entry: %0 = trunc <4 x i32> %in to <4 x i8> @@ -244,7 +257,8 @@ ; CM: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+\.X, T[0-9]+\.X}} -; GCN: buffer_store_dword +; SIVI: buffer_store_dword +; GFX9: global_store_dword define amdgpu_kernel void @store_f32(float addrspace(1)* %out, float %in) { store float %in, float addrspace(1)* %out @@ -256,7 +270,8 @@ ; CM: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+}} -; GCN: buffer_store_dwordx2 +; SIVI: buffer_store_dwordx2 +; GFX9: global_store_dwordx2 define amdgpu_kernel void @store_v4i16(<4 x i16> addrspace(1)* %out, <4 x i32> %in) { entry: %0 = trunc <4 x i32> %in to <4 x i16> @@ -270,7 +285,8 @@ ; CM: MEM_RAT_CACHELESS STORE_DWORD -; GCN: buffer_store_dwordx2 +; SIVI: buffer_store_dwordx2 +; GFX9: global_store_dwordx2 define amdgpu_kernel void @store_v2f32(<2 x float> addrspace(1)* %out, float %a, float %b) { entry: @@ -281,8 +297,11 @@ } ; FUNC-LABEL: {{^}}store_v3i32: -; GCN-DAG: buffer_store_dwordx2 -; GCN-DAG: buffer_store_dword v +; SIVI-DAG: buffer_store_dwordx2 +; SIVI-DAG: buffer_store_dword v + +; GFX9-DAG: global_store_dwordx2 +; GFX9-DAG: global_store_dword v ; EG-DAG: MEM_RAT_CACHELESS STORE_RAW {{T[0-9]+\.[XYZW]}}, {{T[0-9]+\.[XYZW]}}, ; EG-DAG: MEM_RAT_CACHELESS STORE_RAW {{T[0-9]+\.XY}}, {{T[0-9]+\.[XYZW]}}, @@ -298,7 +317,8 @@ ; CM: MEM_RAT_CACHELESS STORE_DWORD ; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD -; GCN: buffer_store_dwordx4 +; SIVI: buffer_store_dwordx4 +; GFX9: global_store_dwordx4 define amdgpu_kernel void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %in) { entry: store <4 x i32> %in, <4 x i32> addrspace(1)* %out @@ -312,7 +332,8 @@ ; CM: MEM_RAT_CACHELESS STORE_DWORD ; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD -; SI: buffer_store_dwordx4 +; SIVI: buffer_store_dwordx4 +; GFX9: global_store_dwordx4 define amdgpu_kernel void @store_v4i32_unaligned(<4 x i32> addrspace(1)* %out, <4 x i32> %in) { entry: store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4 @@ -327,7 +348,8 @@ ; CM: MEM_RAT_CACHELESS STORE_DWORD ; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD -; GCN: buffer_store_dwordx4 +; SIVI: buffer_store_dwordx4 +; GFX9: global_store_dwordx4 define amdgpu_kernel void @store_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { %1 = load <4 x float>, <4 x float> addrspace(1) * %in store <4 x float> %1, <4 x float> addrspace(1)* %out @@ -339,7 +361,8 @@ ; CM: MEM_RAT MSKOR -; GCN: buffer_store_byte +; SIVI: buffer_store_byte +; GFX9: global_store_byte define amdgpu_kernel void @store_i64_i8(i8 addrspace(1)* %out, i64 %in) { entry: %0 = trunc i64 %in to i8 @@ -349,7 +372,8 @@ ; FUNC-LABEL: {{^}}store_i64_i16: ; EG: MEM_RAT MSKOR -; GCN: buffer_store_short +; SIVI: buffer_store_short +; GFX9: global_store_short define amdgpu_kernel void @store_i64_i16(i16 addrspace(1)* %out, i64 %in) { entry: %0 = trunc i64 %in to i16 @@ -368,7 +392,8 @@ ; CM: MEM_RAT_CACHELESS STORE_DWORD ; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD -; GCN: buffer_store_dwordx2 +; SIVI: buffer_store_dwordx2 +; GFX9: global_store_dwordx2 define amdgpu_kernel void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 { entry: %0 = load i32, i32 addrspace(2)* %mem, align 4 @@ -387,7 +412,8 @@ ; CM: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+}}, T{{[0-9]+}}.X -; GCN: buffer_store_dwordx4 +; SIVI: buffer_store_dwordx4 +; GFX9: global_store_dwordx4 define amdgpu_kernel void @i128-const-store(i32 addrspace(1)* %out) { entry: store i32 1, i32 addrspace(1)* %out, align 4 Index: test/CodeGen/AMDGPU/sub.v2i16.ll =================================================================== --- test/CodeGen/AMDGPU/sub.v2i16.ll +++ test/CodeGen/AMDGPU/sub.v2i16.ll @@ -150,8 +150,8 @@ ; FIXME: Need to handle non-uniform case for function below (load without gep). ; GCN-LABEL: {{^}}v_test_sub_v2i16_zext_to_v2i32: -; GFX9: flat_load_dword [[A:v[0-9]+]] -; GFX9: flat_load_dword [[B:v[0-9]+]] +; GFX9: global_load_dword [[A:v[0-9]+]] +; GFX9: global_load_dword [[B:v[0-9]+]] ; GFX9: v_pk_sub_i16 [[ADD:v[0-9]+]], [[A]], [[B]] ; GFX9-DAG: v_and_b32_e32 v[[ELT0:[0-9]+]], 0xffff, [[ADD]] @@ -185,8 +185,8 @@ ; FIXME: Need to handle non-uniform case for function below (load without gep). ; GCN-LABEL: {{^}}v_test_sub_v2i16_zext_to_v2i64: -; GFX9: flat_load_dword [[A:v[0-9]+]] -; GFX9: flat_load_dword [[B:v[0-9]+]] +; GFX9: global_load_dword [[A:v[0-9]+]] +; GFX9: global_load_dword [[B:v[0-9]+]] ; GFX9: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}} ; GFX9: v_pk_sub_i16 [[ADD:v[0-9]+]], [[A]], [[B]] @@ -220,8 +220,8 @@ ; FIXME: Need to handle non-uniform case for function below (load without gep). ; GCN-LABEL: {{^}}v_test_sub_v2i16_sext_to_v2i32: -; GFX9: flat_load_dword [[A:v[0-9]+]] -; GFX9: flat_load_dword [[B:v[0-9]+]] +; GFX9: global_load_dword [[A:v[0-9]+]] +; GFX9: global_load_dword [[B:v[0-9]+]] ; GFX9: v_pk_sub_i16 [[ADD:v[0-9]+]], [[A]], [[B]] ; GFX9-DAG: v_bfe_i32 v[[ELT0:[0-9]+]], [[ADD]], 0, 16 @@ -246,8 +246,8 @@ ; FIXME: Need to handle non-uniform case for function below (load without gep). ; GCN-LABEL: {{^}}v_test_sub_v2i16_sext_to_v2i64: -; GCN: flat_load_dword -; GCN: flat_load_dword +; GCN: {{flat|global}}_load_dword +; GCN: {{flat|global}}_load_dword ; GFX9: v_pk_sub_i16 ; GFX9: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}} Index: test/MC/AMDGPU/flat-global.s =================================================================== --- test/MC/AMDGPU/flat-global.s +++ test/MC/AMDGPU/flat-global.s @@ -85,3 +85,211 @@ global_store_dword v[3:4], v1 offset:12 // GFX9: global_store_dword v[3:4], v1 offset:12 ; encoding: [0x0c,0x80,0x70,0xdc,0x03,0x01,0x00,0x00] // VI-ERR: :37: error: not a valid operand + +global_atomic_cmpswap v[3:4], v[5:6] +// GFX9: global_atomic_cmpswap v[3:4], v[5:6] ; encoding: [0x00,0x80,0x04,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: instruction not supported on this GPU + +global_atomic_cmpswap_x2 v[3:4], v[5:8] +// GFX9: global_atomic_cmpswap_x2 v[3:4], v[5:8] ; encoding: [0x00,0x80,0x84,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: instruction not supported on this GPU + +global_atomic_swap v[3:4], v5 +// GFX9: global_atomic_swap v[3:4], v5 ; encoding: [0x00,0x80,0x00,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: instruction not supported on this GPU + +global_atomic_swap_x2 v[3:4], v[5:6] +// GFX9: global_atomic_swap_x2 v[3:4], v[5:6] ; encoding: [0x00,0x80,0x80,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: instruction not supported on this GPU + +global_atomic_add v[3:4], v5 +// GFX9: global_atomic_add v[3:4], v5 ; encoding: [0x00,0x80,0x08,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: instruction not supported on this GPU + +global_atomic_sub v[3:4], v5 +// GFX9: global_atomic_sub v[3:4], v5 ; encoding: [0x00,0x80,0x0c,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: instruction not supported on this GPU + +global_atomic_smin v[3:4], v5 +// GFX9: global_atomic_smin v[3:4], v5 ; encoding: [0x00,0x80,0x10,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: instruction not supported on this GPU + +global_atomic_umin v[3:4], v5 +// GFX9: global_atomic_umin v[3:4], v5 ; encoding: [0x00,0x80,0x14,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: instruction not supported on this GPU + +global_atomic_smax v[3:4], v5 +// GFX9: global_atomic_smax v[3:4], v5 ; encoding: [0x00,0x80,0x18,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: instruction not supported on this GPU + +global_atomic_umax v[3:4], v5 +// GFX9: global_atomic_umax v[3:4], v5 ; encoding: [0x00,0x80,0x1c,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: instruction not supported on this GPU + +global_atomic_and v[3:4], v5 +// GFX9: global_atomic_and v[3:4], v5 ; encoding: [0x00,0x80,0x20,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: instruction not supported on this GPU + +global_atomic_or v[3:4], v5 +// GFX9: global_atomic_or v[3:4], v5 ; encoding: [0x00,0x80,0x24,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: instruction not supported on this GPU + +global_atomic_xor v[3:4], v5 +// GFX9: global_atomic_xor v[3:4], v5 ; encoding: [0x00,0x80,0x28,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: instruction not supported on this GPU + +global_atomic_inc v[3:4], v5 +// GFX9: global_atomic_inc v[3:4], v5 ; encoding: [0x00,0x80,0x2c,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: instruction not supported on this GPU + +global_atomic_dec v[3:4], v5 +// GFX9: global_atomic_dec v[3:4], v5 ; encoding: [0x00,0x80,0x30,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: instruction not supported on this GPU + +global_atomic_add_x2 v[3:4], v[5:6] +// GFX9: global_atomic_add_x2 v[3:4], v[5:6] ; encoding: [0x00,0x80,0x88,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: instruction not supported on this GPU + +global_atomic_sub_x2 v[3:4], v[5:6] +// GFX9: global_atomic_sub_x2 v[3:4], v[5:6] ; encoding: [0x00,0x80,0x8c,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: instruction not supported on this GPU + +global_atomic_smin_x2 v[3:4], v[5:6] +// GFX9: global_atomic_smin_x2 v[3:4], v[5:6] ; encoding: [0x00,0x80,0x90,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: instruction not supported on this GPU + +global_atomic_umin_x2 v[3:4], v[5:6] +// GFX9: global_atomic_umin_x2 v[3:4], v[5:6] ; encoding: [0x00,0x80,0x94,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: instruction not supported on this GPU + +global_atomic_smax_x2 v[3:4], v[5:6] +// GFX9: global_atomic_smax_x2 v[3:4], v[5:6] ; encoding: [0x00,0x80,0x98,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: instruction not supported on this GPU + +global_atomic_umax_x2 v[3:4], v[5:6] +// GFX9: global_atomic_umax_x2 v[3:4], v[5:6] ; encoding: [0x00,0x80,0x9c,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: instruction not supported on this GPU + +global_atomic_and_x2 v[3:4], v[5:6] +// GFX9: global_atomic_and_x2 v[3:4], v[5:6] ; encoding: [0x00,0x80,0xa0,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: instruction not supported on this GPU + +global_atomic_or_x2 v[3:4], v[5:6] +// GFX9: global_atomic_or_x2 v[3:4], v[5:6] ; encoding: [0x00,0x80,0xa4,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: instruction not supported on this GPU + +global_atomic_xor_x2 v[3:4], v[5:6] +// GFX9: global_atomic_xor_x2 v[3:4], v[5:6] ; encoding: [0x00,0x80,0xa8,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: instruction not supported on this GPU + +global_atomic_inc_x2 v[3:4], v[5:6] +// GFX9: global_atomic_inc_x2 v[3:4], v[5:6] ; encoding: [0x00,0x80,0xac,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: instruction not supported on this GPU + +global_atomic_dec_x2 v[3:4], v[5:6] +// GFX9: global_atomic_dec_x2 v[3:4], v[5:6] ; encoding: [0x00,0x80,0xb0,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: instruction not supported on this GPU + +global_atomic_cmpswap v[3:4], v[5:6] offset:-16 +// GFX9: global_atomic_cmpswap v[3:4], v[5:6] offset:-16 ; encoding: [0xf0,0x9f,0x04,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: :44: error: not a valid operand + +global_atomic_cmpswap_x2 v[3:4], v[5:8] offset:-16 +// GFX9: global_atomic_cmpswap_x2 v[3:4], v[5:8] offset:-16 ; encoding: [0xf0,0x9f,0x84,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: :47: error: not a valid operand + +global_atomic_swap v[3:4], v5 offset:-16 +// GFX9: global_atomic_swap v[3:4], v5 offset:-16 ; encoding: [0xf0,0x9f,0x00,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: :37: error: not a valid operand + +global_atomic_swap_x2 v[3:4], v[5:6] offset:-16 +// GFX9: global_atomic_swap_x2 v[3:4], v[5:6] offset:-16 ; encoding: [0xf0,0x9f,0x80,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: :44: error: not a valid operand + +global_atomic_add v[3:4], v5 offset:-16 +// GFX9: global_atomic_add v[3:4], v5 offset:-16 ; encoding: [0xf0,0x9f,0x08,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: :36: error: not a valid operand + +global_atomic_sub v[3:4], v5 offset:-16 +// GFX9: global_atomic_sub v[3:4], v5 offset:-16 ; encoding: [0xf0,0x9f,0x0c,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: :36: error: not a valid operand + +global_atomic_smin v[3:4], v5 offset:-16 +// GFX9: global_atomic_smin v[3:4], v5 offset:-16 ; encoding: [0xf0,0x9f,0x10,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: :37: error: not a valid operand + +global_atomic_umin v[3:4], v5 offset:-16 +// GFX9: global_atomic_umin v[3:4], v5 offset:-16 ; encoding: [0xf0,0x9f,0x14,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: :37: error: not a valid operand + +global_atomic_smax v[3:4], v5 offset:-16 +// GFX9: global_atomic_smax v[3:4], v5 offset:-16 ; encoding: [0xf0,0x9f,0x18,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: :37: error: not a valid operand + +global_atomic_umax v[3:4], v5 offset:-16 +// GFX9: global_atomic_umax v[3:4], v5 offset:-16 ; encoding: [0xf0,0x9f,0x1c,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: :37: error: not a valid operand + +global_atomic_and v[3:4], v5 offset:-16 +// GFX9: global_atomic_and v[3:4], v5 offset:-16 ; encoding: [0xf0,0x9f,0x20,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: :36: error: not a valid operand + +global_atomic_or v[3:4], v5 offset:-16 +// GFX9: global_atomic_or v[3:4], v5 offset:-16 ; encoding: [0xf0,0x9f,0x24,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: :35: error: not a valid operand + +global_atomic_xor v[3:4], v5 offset:-16 +// GFX9: global_atomic_xor v[3:4], v5 offset:-16 ; encoding: [0xf0,0x9f,0x28,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: :36: error: not a valid operand + +global_atomic_inc v[3:4], v5 offset:-16 +// GFX9: global_atomic_inc v[3:4], v5 offset:-16 ; encoding: [0xf0,0x9f,0x2c,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: :36: error: not a valid operand + +global_atomic_dec v[3:4], v5 offset:-16 +// GFX9: global_atomic_dec v[3:4], v5 offset:-16 ; encoding: [0xf0,0x9f,0x30,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: :36: error: not a valid operand + +global_atomic_add_x2 v[3:4], v[5:6] offset:-16 +// GFX9: global_atomic_add_x2 v[3:4], v[5:6] offset:-16 ; encoding: [0xf0,0x9f,0x88,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: :43: error: not a valid operand + +global_atomic_sub_x2 v[3:4], v[5:6] offset:-16 +// GFX9: global_atomic_sub_x2 v[3:4], v[5:6] offset:-16 ; encoding: [0xf0,0x9f,0x8c,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: :43: error: not a valid operand + +global_atomic_smin_x2 v[3:4], v[5:6] offset:-16 +// GFX9: global_atomic_smin_x2 v[3:4], v[5:6] offset:-16 ; encoding: [0xf0,0x9f,0x90,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: :44: error: not a valid operand + +global_atomic_umin_x2 v[3:4], v[5:6] offset:-16 +// GFX9: global_atomic_umin_x2 v[3:4], v[5:6] offset:-16 ; encoding: [0xf0,0x9f,0x94,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: :44: error: not a valid operand + +global_atomic_smax_x2 v[3:4], v[5:6] offset:-16 +// GFX9: global_atomic_smax_x2 v[3:4], v[5:6] offset:-16 ; encoding: [0xf0,0x9f,0x98,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: :44: error: not a valid operand + +global_atomic_umax_x2 v[3:4], v[5:6] offset:-16 +// GFX9: global_atomic_umax_x2 v[3:4], v[5:6] offset:-16 ; encoding: [0xf0,0x9f,0x9c,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: :44: error: not a valid operand + +global_atomic_and_x2 v[3:4], v[5:6] offset:-16 +// GFX9: global_atomic_and_x2 v[3:4], v[5:6] offset:-16 ; encoding: [0xf0,0x9f,0xa0,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: :43: error: not a valid operand + +global_atomic_or_x2 v[3:4], v[5:6] offset:-16 +// GFX9: global_atomic_or_x2 v[3:4], v[5:6] offset:-16 ; encoding: [0xf0,0x9f,0xa4,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: :42: error: not a valid operand + +global_atomic_xor_x2 v[3:4], v[5:6] offset:-16 +// GFX9: global_atomic_xor_x2 v[3:4], v[5:6] offset:-16 ; encoding: [0xf0,0x9f,0xa8,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: :43: error: not a valid operand + +global_atomic_inc_x2 v[3:4], v[5:6] offset:-16 +// GFX9: global_atomic_inc_x2 v[3:4], v[5:6] offset:-16 ; encoding: [0xf0,0x9f,0xac,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: :43: error: not a valid operand + +global_atomic_dec_x2 v[3:4], v[5:6] offset:-16 +// GFX9: global_atomic_dec_x2 v[3:4], v[5:6] offset:-16 ; encoding: [0xf0,0x9f,0xb0,0xdd,0x03,0x05,0x00,0x00] +// VI-ERR: :43: error: not a valid operand