diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -464,7 +464,6 @@ string opName, RegisterClass vdst_rc, ValueType vt, - SDPatternOperator atomic = null_frag, ValueType data_vt = vt, RegisterClass data_rc = vdst_rc, bit isFP = isFloatType.ret, @@ -483,11 +482,9 @@ def _RTN : FLAT_AtomicRet_Pseudo .ret:$vdst), (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol), - " $vdst, $vaddr, $vdata$offset$cpol", - [(set vt:$vdst, - (atomic (FlatOffset i64:$vaddr, i16:$offset), data_vt:$vdata))]>, - GlobalSaddrTable<0, opName#"_rtn">, - AtomicNoRet { + " $vdst, $vaddr, $vdata$offset$cpol">, + GlobalSaddrTable<0, opName#"_rtn">, + AtomicNoRet { let FPAtomic = isFP; let AddedComplexity = -1; // Prefer global atomics if available } @@ -530,7 +527,6 @@ string opName, RegisterClass vdst_rc, ValueType vt, - SDPatternOperator atomic = null_frag, ValueType data_vt = vt, RegisterClass data_rc = vdst_rc, bit isFP = isFloatType.ret, @@ -540,11 +536,9 @@ def _RTN : FLAT_AtomicRet_Pseudo , - GlobalSaddrTable<0, opName#"_rtn">, - AtomicNoRet { + " $vdst, $vaddr, $vdata, off$offset$cpol">, + GlobalSaddrTable<0, opName#"_rtn">, + AtomicNoRet { let has_saddr = 1; let FPAtomic = isFP; } @@ -566,12 +560,11 @@ string opName, RegisterClass vdst_rc, ValueType vt, - SDPatternOperator atomic_rtn = null_frag, ValueType data_vt = vt, RegisterClass data_rc = vdst_rc> { let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN; - defm "" : FLAT_Global_Atomic_Pseudo_RTN; + defm "" : FLAT_Global_Atomic_Pseudo_RTN; } } @@ -608,93 +601,91 @@ } defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap", - VGPR_32, i32, AMDGPUatomic_cmp_swap_flat_32, - v2i32, VReg_64>; + VGPR_32, i32, v2i32, VReg_64>; defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2", - VReg_64, i64, AMDGPUatomic_cmp_swap_flat_64, - v2i64, VReg_128>; + VReg_64, i64, v2i64, VReg_128>; defm FLAT_ATOMIC_SWAP : FLAT_Atomic_Pseudo <"flat_atomic_swap", - VGPR_32, i32, atomic_swap_flat_32>; + VGPR_32, i32>; defm FLAT_ATOMIC_SWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2", - VReg_64, i64, atomic_swap_flat_64>; + VReg_64, i64>; defm FLAT_ATOMIC_ADD : FLAT_Atomic_Pseudo <"flat_atomic_add", - VGPR_32, i32, atomic_load_add_flat_32>; + VGPR_32, i32>; defm FLAT_ATOMIC_SUB : FLAT_Atomic_Pseudo <"flat_atomic_sub", - VGPR_32, i32, atomic_load_sub_flat_32>; + VGPR_32, i32>; defm FLAT_ATOMIC_SMIN : FLAT_Atomic_Pseudo <"flat_atomic_smin", - VGPR_32, i32, atomic_load_min_flat_32>; + VGPR_32, i32>; defm FLAT_ATOMIC_UMIN : FLAT_Atomic_Pseudo <"flat_atomic_umin", - VGPR_32, i32, atomic_load_umin_flat_32>; + VGPR_32, i32>; defm FLAT_ATOMIC_SMAX : FLAT_Atomic_Pseudo <"flat_atomic_smax", - VGPR_32, i32, atomic_load_max_flat_32>; + VGPR_32, i32>; defm FLAT_ATOMIC_UMAX : FLAT_Atomic_Pseudo <"flat_atomic_umax", - VGPR_32, i32, atomic_load_umax_flat_32>; + VGPR_32, i32>; defm FLAT_ATOMIC_AND : FLAT_Atomic_Pseudo <"flat_atomic_and", - VGPR_32, i32, atomic_load_and_flat_32>; + VGPR_32, i32>; defm FLAT_ATOMIC_OR : FLAT_Atomic_Pseudo <"flat_atomic_or", - VGPR_32, i32, atomic_load_or_flat_32>; + VGPR_32, i32>; defm FLAT_ATOMIC_XOR : FLAT_Atomic_Pseudo <"flat_atomic_xor", - VGPR_32, i32, atomic_load_xor_flat_32>; + VGPR_32, i32>; defm FLAT_ATOMIC_INC : FLAT_Atomic_Pseudo <"flat_atomic_inc", - VGPR_32, i32, atomic_inc_flat_32>; + VGPR_32, i32>; defm FLAT_ATOMIC_DEC : FLAT_Atomic_Pseudo <"flat_atomic_dec", - VGPR_32, i32, atomic_dec_flat_32>; + VGPR_32, i32>; defm FLAT_ATOMIC_ADD_X2 : FLAT_Atomic_Pseudo <"flat_atomic_add_x2", - VReg_64, i64, atomic_load_add_flat_64>; + VReg_64, i64>; defm FLAT_ATOMIC_SUB_X2 : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2", - VReg_64, i64, atomic_load_sub_flat_64>; + VReg_64, i64>; defm FLAT_ATOMIC_SMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2", - VReg_64, i64, atomic_load_min_flat_64>; + VReg_64, i64>; defm FLAT_ATOMIC_UMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2", - VReg_64, i64, atomic_load_umin_flat_64>; + VReg_64, i64>; defm FLAT_ATOMIC_SMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2", - VReg_64, i64, atomic_load_max_flat_64>; + VReg_64, i64>; defm FLAT_ATOMIC_UMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2", - VReg_64, i64, atomic_load_umax_flat_64>; + VReg_64, i64>; defm FLAT_ATOMIC_AND_X2 : FLAT_Atomic_Pseudo <"flat_atomic_and_x2", - VReg_64, i64, atomic_load_and_flat_64>; + VReg_64, i64>; defm FLAT_ATOMIC_OR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_or_x2", - VReg_64, i64, atomic_load_or_flat_64>; + VReg_64, i64>; defm FLAT_ATOMIC_XOR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2", - VReg_64, i64, atomic_load_xor_flat_64>; + VReg_64, i64>; defm FLAT_ATOMIC_INC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2", - VReg_64, i64, atomic_inc_flat_64>; + VReg_64, i64>; defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2", - VReg_64, i64, atomic_dec_flat_64>; + VReg_64, i64>; // GFX7-, GFX10-only flat instructions. let SubtargetPredicate = isGFX7GFX10 in { defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap", - VGPR_32, f32, null_frag, v2f32, VReg_64>; + VGPR_32, f32, v2f32, VReg_64>; defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2", - VReg_64, f64, null_frag, v2f64, VReg_128>; + VReg_64, f64, v2f64, VReg_128>; defm FLAT_ATOMIC_FMIN : FLAT_Atomic_Pseudo <"flat_atomic_fmin", VGPR_32, f32>; @@ -758,88 +749,86 @@ let is_flat_global = 1 in { defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap", - VGPR_32, i32, AMDGPUatomic_cmp_swap_global_32, - v2i32, VReg_64>; + VGPR_32, i32, v2i32, VReg_64>; defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2", - VReg_64, i64, AMDGPUatomic_cmp_swap_global_64, - v2i64, VReg_128>; + VReg_64, i64, v2i64, VReg_128>; defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap", - VGPR_32, i32, atomic_swap_global_32>; + VGPR_32, i32>; defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2", - VReg_64, i64, atomic_swap_global_64>; + VReg_64, i64>; defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add", - VGPR_32, i32, atomic_load_add_global_32>; + VGPR_32, i32>; defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub", - VGPR_32, i32, atomic_load_sub_global_32>; + VGPR_32, i32>; defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin", - VGPR_32, i32, atomic_load_min_global_32>; + VGPR_32, i32>; defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin", - VGPR_32, i32, atomic_load_umin_global_32>; + VGPR_32, i32>; defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax", - VGPR_32, i32, atomic_load_max_global_32>; + VGPR_32, i32>; defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax", - VGPR_32, i32, atomic_load_umax_global_32>; + VGPR_32, i32>; defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and", - VGPR_32, i32, atomic_load_and_global_32>; + VGPR_32, i32>; defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or", - VGPR_32, i32, atomic_load_or_global_32>; + VGPR_32, i32>; defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor", - VGPR_32, i32, atomic_load_xor_global_32>; + VGPR_32, i32>; defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc", - VGPR_32, i32, atomic_inc_global_32>; + VGPR_32, i32>; defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec", - VGPR_32, i32, atomic_dec_global_32>; + VGPR_32, i32>; defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2", - VReg_64, i64, atomic_load_add_global_64>; + VReg_64, i64>; defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2", - VReg_64, i64, atomic_load_sub_global_64>; + VReg_64, i64>; defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2", - VReg_64, i64, atomic_load_min_global_64>; + VReg_64, i64>; defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2", - VReg_64, i64, atomic_load_umin_global_64>; + VReg_64, i64>; defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2", - VReg_64, i64, atomic_load_max_global_64>; + VReg_64, i64>; defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2", - VReg_64, i64, atomic_load_umax_global_64>; + VReg_64, i64>; defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2", - VReg_64, i64, atomic_load_and_global_64>; + VReg_64, i64>; defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2", - VReg_64, i64, atomic_load_or_global_64>; + VReg_64, i64>; defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2", - VReg_64, i64, atomic_load_xor_global_64>; + VReg_64, i64>; defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2", - VReg_64, i64, atomic_inc_global_64>; + VReg_64, i64>; defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2", - VReg_64, i64, atomic_dec_global_64>; + VReg_64, i64>; let SubtargetPredicate = HasGFX10_BEncoding in defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_RTN <"global_atomic_csub", - VGPR_32, i32, int_amdgcn_global_atomic_csub>; + VGPR_32, i32>; let SubtargetPredicate = isGFX940Plus in { @@ -895,13 +884,13 @@ let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in { defm GLOBAL_ATOMIC_FCMPSWAP : - FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32, null_frag, v2f32, VReg_64>; + FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32, v2f32, VReg_64>; defm GLOBAL_ATOMIC_FMIN : FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>; defm GLOBAL_ATOMIC_FMAX : FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>; defm GLOBAL_ATOMIC_FCMPSWAP_X2 : - FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64, null_frag, v2f64, VReg_128>; + FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64, v2f64, VReg_128>; defm GLOBAL_ATOMIC_FMIN_X2 : FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>; defm GLOBAL_ATOMIC_FMAX_X2 :