diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -542,16 +542,10 @@ } } // End foreach as -// TODO: Add GISelPredicateCode for the ret and noret PatFrags once -// GlobalISelEmitter allows pattern matches where src and dst def count -// mismatch. - multiclass noret_op { - let PredicateCode = [{ return (SDValue(N, 0).use_empty()); }], - GISelPredicateCode = [{ return false; }] in { + let HasNoUse = true in def "_noret" : PatFrag<(ops node:$ptr, node:$data), (!cast(NAME) node:$ptr, node:$data)>; - } } defm int_amdgcn_flat_atomic_fadd : noret_op; @@ -565,17 +559,13 @@ defm int_amdgcn_ds_fadd_v2bf16 : noret_op; multiclass noret_binary_atomic_op { - let PredicateCode = [{ return (SDValue(N, 0).use_empty()); }], - GISelPredicateCode = [{ return false; }] in { + let HasNoUse = true in defm "_noret" : binary_atomic_op; - } } multiclass noret_ternary_atomic_op { - let PredicateCode = [{ return (SDValue(N, 0).use_empty()); }], - GISelPredicateCode = [{ return false; }] in { + let HasNoUse = true in defm "_noret" : ternary_atomic_op; - } } multiclass binary_atomic_op_all_as { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -200,8 +200,7 @@ (!cast(NAME) node:$vdata_in, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, node:$cachepolicy, node:$idxen)> { - let PredicateCode = [{ return SDValue(N, 0).use_empty(); }]; - let GISelPredicateCode = [{ return false; }]; + let HasNoUse = true; } } @@ -242,8 +241,7 @@ (SIbuffer_atomic_cmpswap node:$src, node:$cmp, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, node:$cachepolicy, node:$idxen)> { - let PredicateCode = [{ return SDValue(N, 0).use_empty(); }]; - let GISelPredicateCode = [{ return false; }]; + let HasNoUse = true; } class SDGlobalAtomicNoRtn : SDNode %rsrc, i32 %vindex, i32 0, i32 0) @@ -74,7 +74,7 @@ ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NEXT: v_mov_b32_e32 v2, s8 -; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 idxen glc +; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 idxen ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) @@ -124,7 +124,7 @@ ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NEXT: v_mov_b32_e32 v2, s8 -; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[4:7], 0 offen glc +; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[4:7], 0 offen ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) @@ -174,7 +174,7 @@ ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NEXT: v_mov_b32_e32 v2, s8 -; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[4:7], 0 idxen glc +; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[4:7], 0 idxen ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) @@ -224,7 +224,7 @@ ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NEXT: v_mov_b32_e32 v2, s8 -; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[4:7], 0 offen glc +; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[4:7], 0 offen ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) @@ -274,7 +274,7 @@ ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] ; GFX90A-NEXT: v_mov_b32_e32 v2, s8 -; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[4:7], 0 idxen glc +; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[4:7], 0 idxen ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) @@ -322,7 +322,7 @@ ; GFX90A-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NEXT: global_atomic_add_f64 v[0:1], v2, v[0:1], s[0:1] glc +; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) @@ -336,7 +336,7 @@ ; GFX90A-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NEXT: global_atomic_min_f64 v[0:1], v2, v[0:1], s[0:1] glc +; GFX90A-NEXT: global_atomic_min_f64 v2, v[0:1], s[0:1] ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.global.atomic.fmin.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) @@ -350,7 +350,7 @@ ; GFX90A-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NEXT: global_atomic_max_f64 v[0:1], v2, v[0:1], s[0:1] glc +; GFX90A-NEXT: global_atomic_max_f64 v2, v[0:1], s[0:1] ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.global.atomic.fmax.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) @@ -396,7 +396,7 @@ ; GFX90A-NEXT: v_mov_b32_e32 v1, 0x40100000 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX90A-NEXT: global_atomic_add_f64 v[0:1], v2, v[0:1], s[0:1] glc +; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_endpgm @@ -444,7 +444,7 @@ ; GFX90A-NEXT: v_mov_b32_e32 v1, 0x40100000 ; GFX90A-NEXT: v_mov_b32_e32 v2, 0 ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX90A-NEXT: global_atomic_add_f64 v[0:1], v2, v[0:1], s[0:1] glc +; GFX90A-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_endpgm @@ -636,7 +636,7 @@ ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc +; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] ; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX90A-NEXT: buffer_wbinvl1_vol ; GFX90A-NEXT: s_endpgm @@ -763,7 +763,7 @@ ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc +; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0f64.f64(double* %ptr, double %data) @@ -818,7 +818,7 @@ ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] glc +; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[2:3] ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0f64.f64(double* %ptr, double %data) @@ -844,7 +844,7 @@ ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NEXT: flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] glc +; GFX90A-NEXT: flat_atomic_max_f64 v[0:1], v[2:3] ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0f64.f64(double* %ptr, double %data) @@ -871,7 +871,7 @@ ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_mov_b32_e32 v2, s4 ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] -; GFX90A-NEXT: ds_add_rtn_f64 v[0:1], v2, v[0:1] +; GFX90A-NEXT: ds_add_f64 v2, v[0:1] ; GFX90A-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.ds.fadd.f64(double addrspace(3)* %ptr, double %data, i32 0, i32 0, i1 0) @@ -901,7 +901,7 @@ ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_mov_b32_e32 v2, s0 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: ds_add_rtn_f64 v[0:1], v2, v[0:1] +; GFX90A-NEXT: ds_add_f64 v2, v[0:1] ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_endpgm main_body: @@ -918,7 +918,7 @@ ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_mov_b32_e32 v2, s0 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) -; GFX90A-NEXT: ds_add_rtn_f64 v[0:1], v2, v[0:1] +; GFX90A-NEXT: ds_add_f64 v2, v[0:1] ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_endpgm main_body: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir @@ -377,7 +377,7 @@ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} @@ -385,7 +385,7 @@ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} @@ -393,7 +393,7 @@ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX11-NEXT: {{ $}} @@ -401,7 +401,7 @@ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -426,7 +426,7 @@ ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX7-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -434,7 +434,7 @@ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX9-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX9-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} @@ -442,7 +442,7 @@ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX10-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX10-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX11-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX11-NEXT: {{ $}} @@ -450,7 +450,7 @@ ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX11-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX11-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = COPY $vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir @@ -534,8 +534,7 @@ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 + ; GFX6-NEXT: BUFFER_ATOMIC_CMPSWAP_ADDR64 [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-NEXT: {{ $}} @@ -548,8 +547,7 @@ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN]].sub0 + ; GFX7-NEXT: BUFFER_ATOMIC_CMPSWAP_ADDR64 [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX7-FLAT-NEXT: {{ $}} @@ -557,7 +555,7 @@ ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-FLAT-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX8-NEXT: {{ $}} @@ -565,7 +563,7 @@ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX8-NEXT: FLAT_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX9-NEXT: {{ $}} @@ -573,7 +571,7 @@ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: GLOBAL_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} @@ -581,7 +579,7 @@ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_CMPSWAP_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: GLOBAL_ATOMIC_CMPSWAP [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = COPY $vgpr3 @@ -611,8 +609,7 @@ ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 + ; GFX6-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_ADDR64 [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX7-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-NEXT: {{ $}} @@ -625,8 +622,7 @@ ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX7-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 - ; GFX7-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) - ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN]].sub0_sub1 + ; GFX7-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_ADDR64 [[REG_SEQUENCE]], [[COPY]], [[REG_SEQUENCE2]], 0, 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX7-FLAT-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX7-FLAT-NEXT: {{ $}} @@ -634,7 +630,7 @@ ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX7-FLAT-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX7-FLAT-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX7-FLAT-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; GFX7-FLAT-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX8-NEXT: {{ $}} @@ -642,7 +638,7 @@ ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX8-NEXT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; GFX8-NEXT: FLAT_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX9-NEXT: {{ $}} @@ -650,7 +646,7 @@ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX9-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX9-NEXT: GLOBAL_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} @@ -658,7 +654,7 @@ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 ; GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0_sub1, [[COPY2]], %subreg.sub2_sub3 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_CMPSWAP_X2_RTN [[COPY]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX10-NEXT: GLOBAL_ATOMIC_CMPSWAP_X2 [[COPY]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = COPY $vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir @@ -63,25 +63,25 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9-LABEL: name: flat_atomicrmw_add_s32_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-LABEL: name: flat_atomicrmw_add_s32_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX11-LABEL: name: flat_atomicrmw_add_s32_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 0) @@ -178,13 +178,13 @@ ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2047_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset2047_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -200,13 +200,13 @@ ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset2047_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 2047, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2047 @@ -305,13 +305,13 @@ ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2048_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 2048, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset2048_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -327,13 +327,13 @@ ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset2048_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 2048, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2048 @@ -432,13 +432,13 @@ ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4095_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 4095, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4095_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -454,13 +454,13 @@ ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset4095_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 4095, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4095 @@ -579,7 +579,7 @@ ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4097_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -595,7 +595,7 @@ ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX9-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX9-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4097_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -611,7 +611,7 @@ ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX10-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) ; GFX11-LABEL: name: flat_atomicrmw_add_s32_offset4097_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -627,7 +627,7 @@ ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX11-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) + ; GFX11-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4097 @@ -694,25 +694,25 @@ ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX7-NEXT: FLAT_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX9-LABEL: name: flat_atomicrmw_add_s64_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX9-NEXT: FLAT_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX10-LABEL: name: flat_atomicrmw_add_s64_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX10-NEXT: FLAT_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX11-LABEL: name: flat_atomicrmw_add_s64_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX11-NEXT: FLAT_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 0) @@ -809,13 +809,13 @@ ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX7-NEXT: FLAT_ATOMIC_ADD_X2 [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX9-LABEL: name: flat_atomicrmw_add_s64_offset4095_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX9-NEXT: FLAT_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 4095, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX10-LABEL: name: flat_atomicrmw_add_s64_offset4095_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} @@ -831,13 +831,13 @@ ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX10-NEXT: FLAT_ATOMIC_ADD_X2 [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) ; GFX11-LABEL: name: flat_atomicrmw_add_s64_offset4095_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) + ; GFX11-NEXT: FLAT_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 4095, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64)) %0:vgpr(p0) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_CONSTANT i64 4095 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir @@ -81,31 +81,31 @@ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX7-LABEL: name: global_atomicrmw_add_s32_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX9-LABEL: name: global_atomicrmw_add_s32_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10-LABEL: name: global_atomicrmw_add_s32_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX11-LABEL: name: global_atomicrmw_add_s32_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s32) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s32), addrspace 1) @@ -199,7 +199,7 @@ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2047, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} @@ -215,25 +215,25 @@ ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2047, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 2047, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2047 @@ -339,7 +339,7 @@ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 2048, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} @@ -355,13 +355,13 @@ ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 2048, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -377,13 +377,13 @@ ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 2048, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 2048, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 2048 @@ -489,7 +489,7 @@ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} @@ -505,13 +505,13 @@ ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -527,13 +527,13 @@ ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4095 @@ -661,7 +661,7 @@ ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 ; GFX6-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4097 - ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_ADDR64_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX6-NEXT: BUFFER_ATOMIC_ADD_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], [[S_MOV_B32_2]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX7-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX7-NEXT: {{ $}} @@ -677,7 +677,7 @@ ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) + ; GFX7-NEXT: FLAT_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32), addrspace 1) ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} @@ -693,7 +693,7 @@ ; GFX9-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX9-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX9-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX9-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -709,7 +709,7 @@ ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX10-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) ; GFX11-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -725,7 +725,7 @@ ; GFX11-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX11-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s32), addrspace 1) + ; GFX11-NEXT: GLOBAL_ATOMIC_ADD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s32) = COPY $vgpr2 %2:vgpr(s64) = G_CONSTANT i64 4097 @@ -809,31 +809,31 @@ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX6-NEXT: BUFFER_ATOMIC_ADD_X2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX7-LABEL: name: global_atomicrmw_add_s64_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; GFX7-NEXT: FLAT_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) ; GFX9-LABEL: name: global_atomicrmw_add_s64_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX9-NEXT: GLOBAL_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX10-LABEL: name: global_atomicrmw_add_s64_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX10-NEXT: GLOBAL_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX11-LABEL: name: global_atomicrmw_add_s64_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX11-NEXT: GLOBAL_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_ATOMICRMW_ADD %0, %1 :: (load store seq_cst (s64), addrspace 1) @@ -937,7 +937,7 @@ ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 ; GFX6-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[REG_SEQUENCE]], %subreg.sub2_sub3 - ; GFX6-NEXT: [[BUFFER_ATOMIC_ADD_X2_ADDR64_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_ADDR64_RTN [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX6-NEXT: BUFFER_ATOMIC_ADD_X2_ADDR64 [[COPY1]], [[COPY]], [[REG_SEQUENCE1]], 0, 4095, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX7-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX7-NEXT: {{ $}} @@ -953,13 +953,13 @@ ; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX7-NEXT: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX7-NEXT: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) + ; GFX7-NEXT: FLAT_ATOMIC_ADD_X2 [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst (s64), addrspace 1) ; GFX9-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX9-NEXT: GLOBAL_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX10-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} @@ -975,13 +975,13 @@ ; GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec ; GFX10-NEXT: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec ; GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1 - ; GFX10-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX10-NEXT: GLOBAL_ATOMIC_ADD_X2 [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) ; GFX11-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[COPY]], [[COPY1]], 4095, 1, implicit $exec :: (load store seq_cst (s64), addrspace 1) + ; GFX11-NEXT: GLOBAL_ATOMIC_ADD_X2 [[COPY]], [[COPY1]], 4095, 0, implicit $exec :: (load store seq_cst (s64), addrspace 1) %0:vgpr(p1) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = COPY $vgpr2_vgpr3 %2:vgpr(s64) = G_CONSTANT i64 4095 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-local.mir @@ -62,13 +62,13 @@ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX8-NEXT: DS_ADD_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 3) ; GFX9-LABEL: name: atomicrmw_fadd_s32_local_noret ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[DS_ADD_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) + ; GFX9-NEXT: DS_ADD_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst (s32), addrspace 3) ; GFX6-LABEL: name: atomicrmw_fadd_s32_local_noret ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-region.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-region.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-region.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-fadd-region.mir @@ -62,13 +62,13 @@ ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX8-NEXT: DS_ADD_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) ; GFX9-LABEL: name: atomicrmw_fadd_s32_region_noret ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-NEXT: [[DS_ADD_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_ADD_RTN_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) + ; GFX9-NEXT: DS_ADD_F32 [[COPY]], [[COPY1]], 0, 1, implicit $m0, implicit $exec :: (load store seq_cst (s32), addrspace 2) ; GFX6-LABEL: name: atomicrmw_fadd_s32_region_noret ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll @@ -139,7 +139,7 @@ ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v1, s0 -; CI-NEXT: ds_dec_rtn_u32 v0, v1, v0 +; CI-NEXT: ds_dec_u32 v1, v0 ; CI-NEXT: s_endpgm ; ; VI-LABEL: lds_atomic_dec_noret_i32: @@ -149,7 +149,7 @@ ; VI-NEXT: s_mov_b32 m0, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, s0 -; VI-NEXT: ds_dec_rtn_u32 v0, v1, v0 +; VI-NEXT: ds_dec_u32 v1, v0 ; VI-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false) ret void @@ -163,7 +163,7 @@ ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v1, s0 -; CI-NEXT: ds_dec_rtn_u32 v0, v1, v0 offset:16 +; CI-NEXT: ds_dec_u32 v1, v0 offset:16 ; CI-NEXT: s_endpgm ; ; VI-LABEL: lds_atomic_dec_noret_i32_offset: @@ -173,7 +173,7 @@ ; VI-NEXT: s_mov_b32 m0, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, s0 -; VI-NEXT: ds_dec_rtn_u32 v0, v1, v0 offset:16 +; VI-NEXT: ds_dec_u32 v1, v0 offset:16 ; VI-NEXT: s_endpgm ; GFX9-LABEL: lds_atomic_dec_noret_i32_offset: ; GFX9: ; %bb.0: @@ -296,7 +296,7 @@ ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v0, s0 ; CI-NEXT: v_mov_b32_e32 v1, s1 -; CI-NEXT: flat_atomic_dec v0, v[0:1], v2 glc +; CI-NEXT: flat_atomic_dec v[0:1], v2 ; CI-NEXT: s_endpgm ; ; VI-LABEL: global_atomic_dec_noret_i32: @@ -306,7 +306,7 @@ ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: flat_atomic_dec v0, v[0:1], v2 glc +; VI-NEXT: flat_atomic_dec v[0:1], v2 ; VI-NEXT: s_endpgm ; GFX9-LABEL: global_atomic_dec_noret_i32: ; GFX9: ; %bb.0: @@ -331,7 +331,7 @@ ; CI-NEXT: s_addc_u32 s1, s1, 0 ; CI-NEXT: v_mov_b32_e32 v0, s0 ; CI-NEXT: v_mov_b32_e32 v1, s1 -; CI-NEXT: flat_atomic_dec v0, v[0:1], v2 glc +; CI-NEXT: flat_atomic_dec v[0:1], v2 ; CI-NEXT: s_endpgm ; ; VI-LABEL: global_atomic_dec_noret_i32_offset: @@ -343,7 +343,7 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: flat_atomic_dec v0, v[0:1], v2 glc +; VI-NEXT: flat_atomic_dec v[0:1], v2 ; VI-NEXT: s_endpgm ; GFX9-LABEL: global_atomic_dec_noret_i32_offset: ; GFX9: ; %bb.0: @@ -449,7 +449,7 @@ ; CI-NEXT: v_add_i32_e32 v0, vcc, 20, v0 ; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; CI-NEXT: v_mov_b32_e32 v2, 42 -; CI-NEXT: flat_atomic_dec v0, v[0:1], v2 glc +; CI-NEXT: flat_atomic_dec v[0:1], v2 ; CI-NEXT: s_endpgm ; ; VI-LABEL: global_atomic_dec_noret_i32_offset_addr64: @@ -464,7 +464,7 @@ ; VI-NEXT: v_add_u32_e32 v0, vcc, 20, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: v_mov_b32_e32 v2, 42 -; VI-NEXT: flat_atomic_dec v0, v[0:1], v2 glc +; VI-NEXT: flat_atomic_dec v[0:1], v2 ; VI-NEXT: s_endpgm ; GFX9-LABEL: global_atomic_dec_noret_i32_offset_addr64: ; GFX9: ; %bb.0: @@ -599,7 +599,7 @@ ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v0, s0 ; CI-NEXT: v_mov_b32_e32 v1, s1 -; CI-NEXT: flat_atomic_dec v0, v[0:1], v2 glc +; CI-NEXT: flat_atomic_dec v[0:1], v2 ; CI-NEXT: s_endpgm ; ; VI-LABEL: flat_atomic_dec_noret_i32: @@ -609,7 +609,7 @@ ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: flat_atomic_dec v0, v[0:1], v2 glc +; VI-NEXT: flat_atomic_dec v[0:1], v2 ; VI-NEXT: s_endpgm ; GFX9-LABEL: flat_atomic_dec_noret_i32: ; GFX9: ; %bb.0: @@ -634,7 +634,7 @@ ; CI-NEXT: s_addc_u32 s1, s1, 0 ; CI-NEXT: v_mov_b32_e32 v0, s0 ; CI-NEXT: v_mov_b32_e32 v1, s1 -; CI-NEXT: flat_atomic_dec v0, v[0:1], v2 glc +; CI-NEXT: flat_atomic_dec v[0:1], v2 ; CI-NEXT: s_endpgm ; ; VI-LABEL: flat_atomic_dec_noret_i32_offset: @@ -646,7 +646,7 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: flat_atomic_dec v0, v[0:1], v2 glc +; VI-NEXT: flat_atomic_dec v[0:1], v2 ; VI-NEXT: s_endpgm ; GFX9-LABEL: flat_atomic_dec_noret_i32_offset: ; GFX9: ; %bb.0: @@ -752,7 +752,7 @@ ; CI-NEXT: v_add_i32_e32 v0, vcc, 20, v0 ; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; CI-NEXT: v_mov_b32_e32 v2, 42 -; CI-NEXT: flat_atomic_dec v0, v[0:1], v2 glc +; CI-NEXT: flat_atomic_dec v[0:1], v2 ; CI-NEXT: s_endpgm ; ; VI-LABEL: flat_atomic_dec_noret_i32_offset_addr64: @@ -767,7 +767,7 @@ ; VI-NEXT: v_add_u32_e32 v0, vcc, 20, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: v_mov_b32_e32 v2, 42 -; VI-NEXT: flat_atomic_dec v0, v[0:1], v2 glc +; VI-NEXT: flat_atomic_dec v[0:1], v2 ; VI-NEXT: s_endpgm ; GFX9-LABEL: flat_atomic_dec_noret_i32_offset_addr64: ; GFX9: ; %bb.0: @@ -909,7 +909,7 @@ ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v3, s1 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc +; CI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; CI-NEXT: s_endpgm ; ; VI-LABEL: flat_atomic_dec_noret_i64: @@ -920,7 +920,7 @@ ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc +; VI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm ; GFX9-LABEL: flat_atomic_dec_noret_i64: ; GFX9: ; %bb.0: @@ -947,7 +947,7 @@ ; CI-NEXT: s_addc_u32 s1, s1, 0 ; CI-NEXT: v_mov_b32_e32 v3, s1 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc +; CI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; CI-NEXT: s_endpgm ; ; VI-LABEL: flat_atomic_dec_noret_i64_offset: @@ -960,7 +960,7 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc +; VI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm ; GFX9-LABEL: flat_atomic_dec_noret_i64_offset: ; GFX9: ; %bb.0: @@ -1071,7 +1071,7 @@ ; CI-NEXT: v_add_i32_e32 v2, vcc, 40, v2 ; CI-NEXT: v_mov_b32_e32 v1, 0 ; CI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; CI-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc +; CI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; CI-NEXT: s_endpgm ; ; VI-LABEL: flat_atomic_dec_noret_i64_offset_addr64: @@ -1087,7 +1087,7 @@ ; VI-NEXT: v_add_u32_e32 v2, vcc, 40, v2 ; VI-NEXT: v_mov_b32_e32 v1, 0 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; VI-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc +; VI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm ; GFX9-LABEL: flat_atomic_dec_noret_i64_offset_addr64: ; GFX9: ; %bb.0: @@ -1291,7 +1291,7 @@ ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1] +; CI-NEXT: ds_dec_u64 v2, v[0:1] ; CI-NEXT: s_endpgm ; ; VI-LABEL: lds_atomic_dec_noret_i64: @@ -1302,7 +1302,7 @@ ; VI-NEXT: s_mov_b32 m0, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1] +; VI-NEXT: ds_dec_u64 v2, v[0:1] ; VI-NEXT: s_endpgm ; GFX9-LABEL: lds_atomic_dec_noret_i64: ; GFX9: ; %bb.0: @@ -1326,7 +1326,7 @@ ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1] offset:32 +; CI-NEXT: ds_dec_u64 v2, v[0:1] offset:32 ; CI-NEXT: s_endpgm ; ; VI-LABEL: lds_atomic_dec_noret_i64_offset: @@ -1337,7 +1337,7 @@ ; VI-NEXT: s_mov_b32 m0, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: ds_dec_rtn_u64 v[0:1], v2, v[0:1] offset:32 +; VI-NEXT: ds_dec_u64 v2, v[0:1] offset:32 ; VI-NEXT: s_endpgm ; GFX9-LABEL: lds_atomic_dec_noret_i64_offset: ; GFX9: ; %bb.0: @@ -1468,7 +1468,7 @@ ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v3, s1 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc +; CI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; CI-NEXT: s_endpgm ; ; VI-LABEL: global_atomic_dec_noret_i64: @@ -1479,7 +1479,7 @@ ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc +; VI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm ; GFX9-LABEL: global_atomic_dec_noret_i64: ; GFX9: ; %bb.0: @@ -1506,7 +1506,7 @@ ; CI-NEXT: s_addc_u32 s1, s1, 0 ; CI-NEXT: v_mov_b32_e32 v3, s1 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc +; CI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; CI-NEXT: s_endpgm ; ; VI-LABEL: global_atomic_dec_noret_i64_offset: @@ -1519,7 +1519,7 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc +; VI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm ; GFX9-LABEL: global_atomic_dec_noret_i64_offset: ; GFX9: ; %bb.0: @@ -1630,7 +1630,7 @@ ; CI-NEXT: v_add_i32_e32 v2, vcc, 40, v2 ; CI-NEXT: v_mov_b32_e32 v1, 0 ; CI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; CI-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc +; CI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; CI-NEXT: s_endpgm ; ; VI-LABEL: global_atomic_dec_noret_i64_offset_addr64: @@ -1646,7 +1646,7 @@ ; VI-NEXT: v_add_u32_e32 v2, vcc, 40, v2 ; VI-NEXT: v_mov_b32_e32 v1, 0 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; VI-NEXT: flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc +; VI-NEXT: flat_atomic_dec_x2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm ; GFX9-LABEL: global_atomic_dec_noret_i64_offset_addr64: ; GFX9: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll @@ -153,7 +153,7 @@ ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v1, s0 -; CI-NEXT: ds_inc_rtn_u32 v0, v1, v0 +; CI-NEXT: ds_inc_u32 v1, v0 ; CI-NEXT: s_endpgm ; ; VI-LABEL: lds_atomic_inc_noret_i32: @@ -163,7 +163,7 @@ ; VI-NEXT: s_mov_b32 m0, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, s0 -; VI-NEXT: ds_inc_rtn_u32 v0, v1, v0 +; VI-NEXT: ds_inc_u32 v1, v0 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: lds_atomic_inc_noret_i32: @@ -172,7 +172,7 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, 42 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s0 -; GFX9-NEXT: ds_inc_rtn_u32 v0, v0, v1 +; GFX9-NEXT: ds_inc_u32 v0, v1 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: lds_atomic_inc_noret_i32: @@ -181,7 +181,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v1, 42 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s0 -; GFX10-NEXT: ds_inc_rtn_u32 v0, v0, v1 +; GFX10-NEXT: ds_inc_u32 v0, v1 ; GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false) ret void @@ -195,7 +195,7 @@ ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v1, s0 -; CI-NEXT: ds_inc_rtn_u32 v0, v1, v0 offset:16 +; CI-NEXT: ds_inc_u32 v1, v0 offset:16 ; CI-NEXT: s_endpgm ; ; VI-LABEL: lds_atomic_inc_noret_i32_offset: @@ -205,7 +205,7 @@ ; VI-NEXT: s_mov_b32 m0, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, s0 -; VI-NEXT: ds_inc_rtn_u32 v0, v1, v0 offset:16 +; VI-NEXT: ds_inc_u32 v1, v0 offset:16 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: lds_atomic_inc_noret_i32_offset: @@ -214,7 +214,7 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, s0 -; GFX9-NEXT: ds_inc_rtn_u32 v0, v1, v0 offset:16 +; GFX9-NEXT: ds_inc_u32 v1, v0 offset:16 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: lds_atomic_inc_noret_i32_offset: @@ -223,7 +223,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v1, s0 -; GFX10-NEXT: ds_inc_rtn_u32 v0, v1, v0 offset:16 +; GFX10-NEXT: ds_inc_u32 v1, v0 offset:16 ; GFX10-NEXT: s_endpgm %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false) @@ -353,7 +353,7 @@ ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v0, s0 ; CI-NEXT: v_mov_b32_e32 v1, s1 -; CI-NEXT: flat_atomic_inc v0, v[0:1], v2 glc +; CI-NEXT: flat_atomic_inc v[0:1], v2 ; CI-NEXT: s_endpgm ; ; VI-LABEL: global_atomic_inc_noret_i32: @@ -363,7 +363,7 @@ ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: flat_atomic_inc v0, v[0:1], v2 glc +; VI-NEXT: flat_atomic_inc v[0:1], v2 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: global_atomic_inc_noret_i32: @@ -372,7 +372,7 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_atomic_inc v0, v1, v0, s[0:1] glc +; GFX9-NEXT: global_atomic_inc v1, v0, s[0:1] ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: global_atomic_inc_noret_i32: @@ -381,7 +381,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: global_atomic_inc v0, v1, v0, s[0:1] glc +; GFX10-NEXT: global_atomic_inc v1, v0, s[0:1] ; GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false) ret void @@ -397,7 +397,7 @@ ; CI-NEXT: s_addc_u32 s1, s1, 0 ; CI-NEXT: v_mov_b32_e32 v0, s0 ; CI-NEXT: v_mov_b32_e32 v1, s1 -; CI-NEXT: flat_atomic_inc v0, v[0:1], v2 glc +; CI-NEXT: flat_atomic_inc v[0:1], v2 ; CI-NEXT: s_endpgm ; ; VI-LABEL: global_atomic_inc_noret_i32_offset: @@ -409,7 +409,7 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: flat_atomic_inc v0, v[0:1], v2 glc +; VI-NEXT: flat_atomic_inc v[0:1], v2 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: global_atomic_inc_noret_i32_offset: @@ -418,7 +418,7 @@ ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_atomic_inc v0, v1, v0, s[0:1] offset:16 glc +; GFX9-NEXT: global_atomic_inc v1, v0, s[0:1] offset:16 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: global_atomic_inc_noret_i32_offset: @@ -427,7 +427,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: global_atomic_inc v0, v1, v0, s[0:1] offset:16 glc +; GFX10-NEXT: global_atomic_inc v1, v0, s[0:1] offset:16 ; GFX10-NEXT: s_endpgm %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false) @@ -520,7 +520,7 @@ ; CI-NEXT: v_add_i32_e32 v0, vcc, 20, v0 ; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; CI-NEXT: v_mov_b32_e32 v2, 42 -; CI-NEXT: flat_atomic_inc v0, v[0:1], v2 glc +; CI-NEXT: flat_atomic_inc v[0:1], v2 ; CI-NEXT: s_endpgm ; ; VI-LABEL: global_atomic_inc_noret_i32_offset_addr64: @@ -535,7 +535,7 @@ ; VI-NEXT: v_add_u32_e32 v0, vcc, 20, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: v_mov_b32_e32 v2, 42 -; VI-NEXT: flat_atomic_inc v0, v[0:1], v2 glc +; VI-NEXT: flat_atomic_inc v[0:1], v2 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: global_atomic_inc_noret_i32_offset_addr64: @@ -544,7 +544,7 @@ ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, 42 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_atomic_inc v0, v0, v1, s[0:1] offset:20 glc +; GFX9-NEXT: global_atomic_inc v0, v1, s[0:1] offset:20 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: global_atomic_inc_noret_i32_offset_addr64: @@ -553,7 +553,7 @@ ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-NEXT: v_mov_b32_e32 v1, 42 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: global_atomic_inc v0, v0, v1, s[0:1] offset:20 glc +; GFX10-NEXT: global_atomic_inc v0, v1, s[0:1] offset:20 ; GFX10-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id @@ -773,7 +773,7 @@ ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1] +; CI-NEXT: ds_inc_u64 v2, v[0:1] ; CI-NEXT: s_endpgm ; ; VI-LABEL: lds_atomic_inc_noret_i64: @@ -784,7 +784,7 @@ ; VI-NEXT: s_mov_b32 m0, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1] +; VI-NEXT: ds_inc_u64 v2, v[0:1] ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: lds_atomic_inc_noret_i64: @@ -794,7 +794,7 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, s0 -; GFX9-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1] +; GFX9-NEXT: ds_inc_u64 v2, v[0:1] ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: lds_atomic_inc_noret_i64: @@ -804,7 +804,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v2, s0 -; GFX10-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1] +; GFX10-NEXT: ds_inc_u64 v2, v[0:1] ; GFX10-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false) ret void @@ -819,7 +819,7 @@ ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1] offset:32 +; CI-NEXT: ds_inc_u64 v2, v[0:1] offset:32 ; CI-NEXT: s_endpgm ; ; VI-LABEL: lds_atomic_inc_noret_i64_offset: @@ -830,7 +830,7 @@ ; VI-NEXT: s_mov_b32 m0, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1] offset:32 +; VI-NEXT: ds_inc_u64 v2, v[0:1] offset:32 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: lds_atomic_inc_noret_i64_offset: @@ -840,7 +840,7 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, s0 -; GFX9-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1] offset:32 +; GFX9-NEXT: ds_inc_u64 v2, v[0:1] offset:32 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: lds_atomic_inc_noret_i64_offset: @@ -850,7 +850,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v2, s0 -; GFX10-NEXT: ds_inc_rtn_u64 v[0:1], v2, v[0:1] offset:32 +; GFX10-NEXT: ds_inc_u64 v2, v[0:1] offset:32 ; GFX10-NEXT: s_endpgm %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false) @@ -989,7 +989,7 @@ ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v3, s1 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc +; CI-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; CI-NEXT: s_endpgm ; ; VI-LABEL: global_atomic_inc_noret_i64: @@ -1000,7 +1000,7 @@ ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc +; VI-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: global_atomic_inc_noret_i64: @@ -1010,7 +1010,7 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_atomic_inc_x2 v[0:1], v2, v[0:1], s[0:1] glc +; GFX9-NEXT: global_atomic_inc_x2 v2, v[0:1], s[0:1] ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: global_atomic_inc_noret_i64: @@ -1020,7 +1020,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: global_atomic_inc_x2 v[0:1], v2, v[0:1], s[0:1] glc +; GFX10-NEXT: global_atomic_inc_x2 v2, v[0:1], s[0:1] ; GFX10-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false) ret void @@ -1037,7 +1037,7 @@ ; CI-NEXT: s_addc_u32 s1, s1, 0 ; CI-NEXT: v_mov_b32_e32 v3, s1 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc +; CI-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; CI-NEXT: s_endpgm ; ; VI-LABEL: global_atomic_inc_noret_i64_offset: @@ -1050,7 +1050,7 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc +; VI-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: global_atomic_inc_noret_i64_offset: @@ -1060,7 +1060,7 @@ ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_atomic_inc_x2 v[0:1], v2, v[0:1], s[0:1] offset:32 glc +; GFX9-NEXT: global_atomic_inc_x2 v2, v[0:1], s[0:1] offset:32 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: global_atomic_inc_noret_i64_offset: @@ -1070,7 +1070,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: global_atomic_inc_x2 v[0:1], v2, v[0:1], s[0:1] offset:32 glc +; GFX10-NEXT: global_atomic_inc_x2 v2, v[0:1], s[0:1] offset:32 ; GFX10-NEXT: s_endpgm %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false) @@ -1168,7 +1168,7 @@ ; CI-NEXT: v_add_i32_e32 v2, vcc, 40, v2 ; CI-NEXT: v_mov_b32_e32 v1, 0 ; CI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; CI-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc +; CI-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; CI-NEXT: s_endpgm ; ; VI-LABEL: global_atomic_inc_noret_i64_offset_addr64: @@ -1184,7 +1184,7 @@ ; VI-NEXT: v_add_u32_e32 v2, vcc, 40, v2 ; VI-NEXT: v_mov_b32_e32 v1, 0 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; VI-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc +; VI-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: global_atomic_inc_noret_i64_offset_addr64: @@ -1194,7 +1194,7 @@ ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_atomic_inc_x2 v[0:1], v0, v[1:2], s[0:1] offset:40 glc +; GFX9-NEXT: global_atomic_inc_x2 v0, v[1:2], s[0:1] offset:40 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: global_atomic_inc_noret_i64_offset_addr64: @@ -1204,7 +1204,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: global_atomic_inc_x2 v[0:1], v0, v[1:2], s[0:1] offset:40 glc +; GFX10-NEXT: global_atomic_inc_x2 v0, v[1:2], s[0:1] offset:40 ; GFX10-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id @@ -1308,7 +1308,7 @@ ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v0, s0 ; GCN-NEXT: v_mov_b32_e32 v1, s1 -; GCN-NEXT: flat_atomic_inc v0, v[0:1], v2 glc +; GCN-NEXT: flat_atomic_inc v[0:1], v2 ; GCN-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false) ret void @@ -1324,7 +1324,7 @@ ; CI-NEXT: s_addc_u32 s1, s1, 0 ; CI-NEXT: v_mov_b32_e32 v0, s0 ; CI-NEXT: v_mov_b32_e32 v1, s1 -; CI-NEXT: flat_atomic_inc v0, v[0:1], v2 glc +; CI-NEXT: flat_atomic_inc v[0:1], v2 ; CI-NEXT: s_endpgm ; ; VI-LABEL: flat_atomic_inc_noret_i32_offset: @@ -1336,7 +1336,7 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 -; VI-NEXT: flat_atomic_inc v0, v[0:1], v2 glc +; VI-NEXT: flat_atomic_inc v[0:1], v2 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: flat_atomic_inc_noret_i32_offset: @@ -1346,7 +1346,7 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 -; GFX9-NEXT: flat_atomic_inc v0, v[0:1], v2 offset:16 glc +; GFX9-NEXT: flat_atomic_inc v[0:1], v2 offset:16 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: flat_atomic_inc_noret_i32_offset: @@ -1358,7 +1358,7 @@ ; GFX10-NEXT: s_addc_u32 s1, s1, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-NEXT: flat_atomic_inc v0, v[0:1], v2 glc +; GFX10-NEXT: flat_atomic_inc v[0:1], v2 ; GFX10-NEXT: s_endpgm %gep = getelementptr i32, i32* %ptr, i32 4 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) @@ -1469,7 +1469,7 @@ ; CI-NEXT: v_add_i32_e32 v0, vcc, 20, v0 ; CI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; CI-NEXT: v_mov_b32_e32 v2, 42 -; CI-NEXT: flat_atomic_inc v0, v[0:1], v2 glc +; CI-NEXT: flat_atomic_inc v[0:1], v2 ; CI-NEXT: s_endpgm ; ; VI-LABEL: flat_atomic_inc_noret_i32_offset_addr64: @@ -1484,7 +1484,7 @@ ; VI-NEXT: v_add_u32_e32 v0, vcc, 20, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: v_mov_b32_e32 v2, 42 -; VI-NEXT: flat_atomic_inc v0, v[0:1], v2 glc +; VI-NEXT: flat_atomic_inc v[0:1], v2 ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: flat_atomic_inc_noret_i32_offset_addr64: @@ -1497,7 +1497,7 @@ ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: v_mov_b32_e32 v2, 42 -; GFX9-NEXT: flat_atomic_inc v0, v[0:1], v2 offset:20 glc +; GFX9-NEXT: flat_atomic_inc v[0:1], v2 offset:20 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: flat_atomic_inc_noret_i32_offset_addr64: @@ -1512,7 +1512,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v2, 42 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, 20 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo -; GFX10-NEXT: flat_atomic_inc v0, v[0:1], v2 glc +; GFX10-NEXT: flat_atomic_inc v[0:1], v2 ; GFX10-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() %gep.tid = getelementptr i32, i32* %ptr, i32 %id @@ -1697,7 +1697,7 @@ ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v3, s1 ; GCN-NEXT: v_mov_b32_e32 v2, s0 -; GCN-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc +; GCN-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; GCN-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false) ret void @@ -1714,7 +1714,7 @@ ; CI-NEXT: s_addc_u32 s1, s1, 0 ; CI-NEXT: v_mov_b32_e32 v3, s1 ; CI-NEXT: v_mov_b32_e32 v2, s0 -; CI-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc +; CI-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; CI-NEXT: s_endpgm ; ; VI-LABEL: flat_atomic_inc_noret_i64_offset: @@ -1727,7 +1727,7 @@ ; VI-NEXT: s_addc_u32 s1, s1, 0 ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: v_mov_b32_e32 v2, s0 -; VI-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc +; VI-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: flat_atomic_inc_noret_i64_offset: @@ -1738,7 +1738,7 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-NEXT: v_mov_b32_e32 v2, s0 -; GFX9-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] offset:32 glc +; GFX9-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] offset:32 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: flat_atomic_inc_noret_i64_offset: @@ -1751,7 +1751,7 @@ ; GFX10-NEXT: s_addc_u32 s1, s1, 0 ; GFX10-NEXT: v_mov_b32_e32 v3, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, s0 -; GFX10-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc +; GFX10-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; GFX10-NEXT: s_endpgm %gep = getelementptr i64, i64* %ptr, i32 4 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) @@ -1867,7 +1867,7 @@ ; CI-NEXT: v_add_i32_e32 v2, vcc, 40, v2 ; CI-NEXT: v_mov_b32_e32 v1, 0 ; CI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; CI-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc +; CI-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; CI-NEXT: s_endpgm ; ; VI-LABEL: flat_atomic_inc_noret_i64_offset_addr64: @@ -1883,7 +1883,7 @@ ; VI-NEXT: v_add_u32_e32 v2, vcc, 40, v2 ; VI-NEXT: v_mov_b32_e32 v1, 0 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc -; VI-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc +; VI-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; VI-NEXT: s_endpgm ; ; GFX9-LABEL: flat_atomic_inc_noret_i64_offset_addr64: @@ -1897,7 +1897,7 @@ ; GFX9-NEXT: v_mov_b32_e32 v3, s0 ; GFX9-NEXT: v_add_co_u32_e32 v3, vcc, v3, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v4, vcc -; GFX9-NEXT: flat_atomic_inc_x2 v[0:1], v[3:4], v[1:2] offset:40 glc +; GFX9-NEXT: flat_atomic_inc_x2 v[3:4], v[1:2] offset:40 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: flat_atomic_inc_noret_i64_offset_addr64: @@ -1913,7 +1913,7 @@ ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, 40 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo -; GFX10-NEXT: flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc +; GFX10-NEXT: flat_atomic_inc_x2 v[2:3], v[0:1] ; GFX10-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() %gep.tid = getelementptr i64, i64* %ptr, i32 %id diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fadd.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fadd.ll @@ -68,21 +68,21 @@ ; GFX8-NEXT: v_mov_b32_e32 v0, s2 ; GFX8-NEXT: v_mov_b32_e32 v1, s3 ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: ds_add_rtn_f32 v0, v0, v1 +; GFX8-NEXT: ds_add_f32 v0, v1 ; GFX8-NEXT: s_endpgm ; ; GFX9-LABEL: ds_fadd_f32_ss_nortn: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: ds_add_rtn_f32 v0, v0, v1 +; GFX9-NEXT: ds_add_f32 v0, v1 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: ds_fadd_f32_ss_nortn: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-NEXT: ds_add_rtn_f32 v0, v0, v1 +; GFX10-NEXT: ds_add_f32 v0, v1 ; GFX10-NEXT: s_endpgm %unused = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) ret void @@ -94,21 +94,21 @@ ; GFX8-NEXT: v_mov_b32_e32 v0, s3 ; GFX8-NEXT: v_mov_b32_e32 v1, s2 ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: ds_add_rtn_f32 v0, v1, v0 offset:512 +; GFX8-NEXT: ds_add_f32 v1, v0 offset:512 ; GFX8-NEXT: s_endpgm ; ; GFX9-LABEL: ds_fadd_f32_ss_offset_nortn: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v0, s3 ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: ds_add_rtn_f32 v0, v1, v0 offset:512 +; GFX9-NEXT: ds_add_f32 v1, v0 offset:512 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: ds_fadd_f32_ss_offset_nortn: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_mov_b32_e32 v0, s3 ; GFX10-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-NEXT: ds_add_rtn_f32 v0, v1, v0 offset:512 +; GFX10-NEXT: ds_add_f32 v1, v0 offset:512 ; GFX10-NEXT: s_endpgm %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 %unused = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) @@ -175,14 +175,14 @@ ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: ds_add_rtn_f32 v0, v0, v1 +; GFX8-NEXT: ds_add_f32 v0, v1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: ds_fadd_f32_vv_nortn: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ds_add_rtn_f32 v0, v0, v1 +; GFX9-NEXT: ds_add_f32 v0, v1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -190,7 +190,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: ds_add_rtn_f32 v0, v0, v1 +; GFX10-NEXT: ds_add_f32 v0, v1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] %ret = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) @@ -202,14 +202,14 @@ ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: ds_add_rtn_f32 v0, v0, v1 offset:512 +; GFX8-NEXT: ds_add_f32 v0, v1 offset:512 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: ds_fadd_f32_vv_offset_nortn: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ds_add_rtn_f32 v0, v0, v1 offset:512 +; GFX9-NEXT: ds_add_f32 v0, v1 offset:512 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -217,7 +217,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: ds_add_rtn_f32 v0, v0, v1 offset:512 +; GFX10-NEXT: ds_add_f32 v0, v1 offset:512 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll @@ -103,14 +103,14 @@ ; GFX8-NEXT: v_mov_b32_e32 v0, s2 ; GFX8-NEXT: v_mov_b32_e32 v1, s3 ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: ds_max_rtn_f32 v0, v0, v1 +; GFX8-NEXT: ds_max_f32 v0, v1 ; GFX8-NEXT: s_endpgm ; ; GFX9-LABEL: ds_fmax_f32_ss_nortn: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1 +; GFX9-NEXT: ds_max_f32 v0, v1 ; GFX9-NEXT: s_endpgm ; GFX8-MIR-LABEL: name: ds_fmax_f32_ss_nortn ; GFX8-MIR: bb.1 (%ir-block.0): @@ -121,7 +121,7 @@ ; GFX8-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; GFX8-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY2]], [[COPY3]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) + ; GFX8-MIR-NEXT: DS_MAX_F32 [[COPY2]], [[COPY3]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) ; GFX8-MIR-NEXT: S_ENDPGM 0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss_nortn ; GFX9-MIR: bb.1 (%ir-block.0): @@ -131,7 +131,7 @@ ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX9-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]] ; GFX9-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] - ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY2]], [[COPY3]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) + ; GFX9-MIR-NEXT: DS_MAX_F32_gfx9 [[COPY2]], [[COPY3]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) ; GFX9-MIR-NEXT: S_ENDPGM 0 %unused = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) ret void @@ -143,14 +143,14 @@ ; GFX8-NEXT: v_mov_b32_e32 v0, s3 ; GFX8-NEXT: v_mov_b32_e32 v1, s2 ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: ds_max_rtn_f32 v0, v1, v0 offset:512 +; GFX8-NEXT: ds_max_f32 v1, v0 offset:512 ; GFX8-NEXT: s_endpgm ; ; GFX9-LABEL: ds_fmax_f32_ss_offset_nortn: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v0, s3 ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: ds_max_rtn_f32 v0, v1, v0 offset:512 +; GFX9-NEXT: ds_max_f32 v1, v0 offset:512 ; GFX9-NEXT: s_endpgm ; GFX8-MIR-LABEL: name: ds_fmax_f32_ss_offset_nortn ; GFX8-MIR: bb.1 (%ir-block.0): @@ -161,7 +161,7 @@ ; GFX8-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1 ; GFX8-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY3]], [[COPY2]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) + ; GFX8-MIR-NEXT: DS_MAX_F32 [[COPY3]], [[COPY2]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) ; GFX8-MIR-NEXT: S_ENDPGM 0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss_offset_nortn ; GFX9-MIR: bb.1 (%ir-block.0): @@ -171,7 +171,7 @@ ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 ; GFX9-MIR-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] ; GFX9-MIR-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY3]], [[COPY2]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) + ; GFX9-MIR-NEXT: DS_MAX_F32_gfx9 [[COPY3]], [[COPY2]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) ; GFX9-MIR-NEXT: S_ENDPGM 0 %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 %unused = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) @@ -260,14 +260,14 @@ ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: ds_max_rtn_f32 v0, v0, v1 +; GFX8-NEXT: ds_max_f32 v0, v1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: ds_fmax_f32_vv_nortn: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1 +; GFX9-NEXT: ds_max_f32 v0, v1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_nortn @@ -277,7 +277,7 @@ ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) + ; GFX8-MIR-NEXT: DS_MAX_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) ; GFX8-MIR-NEXT: SI_RETURN ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_nortn ; GFX9-MIR: bb.1 (%ir-block.0): @@ -285,7 +285,7 @@ ; GFX9-MIR-NEXT: {{ $}} ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) + ; GFX9-MIR-NEXT: DS_MAX_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) ; GFX9-MIR-NEXT: SI_RETURN %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) ret void @@ -296,14 +296,14 @@ ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: ds_max_rtn_f32 v0, v0, v1 offset:512 +; GFX8-NEXT: ds_max_f32 v0, v1 offset:512 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: ds_fmax_f32_vv_offset_nortn: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1 offset:512 +; GFX9-NEXT: ds_max_f32 v0, v1 offset:512 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_offset_nortn @@ -313,7 +313,7 @@ ; GFX8-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX8-MIR-NEXT: $m0 = S_MOV_B32 -1 - ; GFX8-MIR-NEXT: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) + ; GFX8-MIR-NEXT: DS_MAX_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) ; GFX8-MIR-NEXT: SI_RETURN ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_offset_nortn ; GFX9-MIR: bb.1 (%ir-block.0): @@ -321,7 +321,7 @@ ; GFX9-MIR-NEXT: {{ $}} ; GFX9-MIR-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-MIR-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-MIR-NEXT: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) + ; GFX9-MIR-NEXT: DS_MAX_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) ; GFX9-MIR-NEXT: SI_RETURN %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmin.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmin.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmin.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmin.ll @@ -68,21 +68,21 @@ ; GFX8-NEXT: v_mov_b32_e32 v0, s2 ; GFX8-NEXT: v_mov_b32_e32 v1, s3 ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: ds_min_rtn_f32 v0, v0, v1 +; GFX8-NEXT: ds_min_f32 v0, v1 ; GFX8-NEXT: s_endpgm ; ; GFX9-LABEL: ds_fmin_f32_ss_nortn: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: ds_min_rtn_f32 v0, v0, v1 +; GFX9-NEXT: ds_min_f32 v0, v1 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: ds_fmin_f32_ss_nortn: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_mov_b32_e32 v0, s2 ; GFX10-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-NEXT: ds_min_rtn_f32 v0, v0, v1 +; GFX10-NEXT: ds_min_f32 v0, v1 ; GFX10-NEXT: s_endpgm %unused = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) ret void @@ -94,21 +94,21 @@ ; GFX8-NEXT: v_mov_b32_e32 v0, s3 ; GFX8-NEXT: v_mov_b32_e32 v1, s2 ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: ds_min_rtn_f32 v0, v1, v0 offset:512 +; GFX8-NEXT: ds_min_f32 v1, v0 offset:512 ; GFX8-NEXT: s_endpgm ; ; GFX9-LABEL: ds_fmin_f32_ss_offset_nortn: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_mov_b32_e32 v0, s3 ; GFX9-NEXT: v_mov_b32_e32 v1, s2 -; GFX9-NEXT: ds_min_rtn_f32 v0, v1, v0 offset:512 +; GFX9-NEXT: ds_min_f32 v1, v0 offset:512 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: ds_fmin_f32_ss_offset_nortn: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_mov_b32_e32 v0, s3 ; GFX10-NEXT: v_mov_b32_e32 v1, s2 -; GFX10-NEXT: ds_min_rtn_f32 v0, v1, v0 offset:512 +; GFX10-NEXT: ds_min_f32 v1, v0 offset:512 ; GFX10-NEXT: s_endpgm %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 %unused = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) @@ -175,14 +175,14 @@ ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: ds_min_rtn_f32 v0, v0, v1 +; GFX8-NEXT: ds_min_f32 v0, v1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: ds_fmin_f32_vv_nortn: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ds_min_rtn_f32 v0, v0, v1 +; GFX9-NEXT: ds_min_f32 v0, v1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -190,7 +190,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: ds_min_rtn_f32 v0, v0, v1 +; GFX10-NEXT: ds_min_f32 v0, v1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) @@ -202,14 +202,14 @@ ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: ds_min_rtn_f32 v0, v0, v1 offset:512 +; GFX8-NEXT: ds_min_f32 v0, v1 offset:512 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: ds_fmin_f32_vv_offset_nortn: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ds_min_rtn_f32 v0, v0, v1 offset:512 +; GFX9-NEXT: ds_min_f32 v0, v1 offset:512 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -217,7 +217,7 @@ ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: ds_min_rtn_f32 v0, v0, v1 offset:512 +; GFX10-NEXT: ds_min_f32 v0, v1 offset:512 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd.ll @@ -13,7 +13,7 @@ ; GFX90A-LABEL: global_atomic_fadd_f32: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off glc +; GFX90A-NEXT: global_atomic_add_f32 v[0:1], v2, off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data) @@ -31,7 +31,7 @@ ; GFX90A-LABEL: global_atomic_fadd_f32_off_2048: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:2048 glc +; GFX90A-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:2048 ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] %gep = getelementptr float, float addrspace(1)* %ptr, i64 512 @@ -50,7 +50,7 @@ ; GFX90A-LABEL: global_atomic_fadd_f32_off_neg2047: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: global_atomic_add_f32 v0, v[0:1], v2, off offset:-2044 glc +; GFX90A-NEXT: global_atomic_add_f32 v[0:1], v2, off offset:-2044 ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] %gep = getelementptr float, float addrspace(1)* %ptr, i64 -511 @@ -77,7 +77,7 @@ ; GFX90A-NEXT: v_mov_b32_e32 v1, 0 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: v_mov_b32_e32 v0, s2 -; GFX90A-NEXT: global_atomic_add_f32 v0, v1, v0, s[0:1] offset:2048 glc +; GFX90A-NEXT: global_atomic_add_f32 v1, v0, s[0:1] offset:2048 ; GFX90A-NEXT: s_endpgm %gep = getelementptr float, float addrspace(1)* %ptr, i64 512 %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %gep, float %data) @@ -95,7 +95,7 @@ ; GFX90A-LABEL: global_atomic_fadd_v2f16: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: global_atomic_pk_add_f16 v0, v[0:1], v2, off glc +; GFX90A-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) @@ -113,7 +113,7 @@ ; GFX90A-LABEL: global_atomic_fadd_v2f16_off_neg2047: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX90A-NEXT: global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:-2044 glc +; GFX90A-NEXT: global_atomic_pk_add_f16 v[0:1], v2, off offset:-2044 ; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: s_setpc_b64 s[30:31] %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %ptr, i64 -511 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll @@ -85,7 +85,7 @@ ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_X2_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_ATOMIC_ADD_X2_OFFEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 4) ; CHECK-NEXT: S_ENDPGM 0 %ret = call i64 @llvm.amdgcn.raw.buffer.atomic.add.i64(i64 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -200,7 +200,7 @@ ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_ATOMIC_ADD_OFFEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll @@ -42,8 +42,7 @@ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 + ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; CHECK-NEXT: S_ENDPGM 0 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -165,8 +164,7 @@ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE2]], [[COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 + ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE2]], [[COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll @@ -30,7 +30,7 @@ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -63,7 +63,7 @@ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX90A-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0) @@ -95,7 +95,7 @@ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) ret void @@ -127,7 +127,7 @@ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret void @@ -234,7 +234,7 @@ ; GFX90A-NEXT: bb.3: ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX90A-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX90A-NEXT: {{ $}} @@ -346,7 +346,7 @@ ; GFX90A-NEXT: bb.3: ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY6]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX90A-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX90A-NEXT: {{ $}} @@ -388,7 +388,7 @@ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX90A-NEXT: S_ENDPGM 0 %voffset = add i32 %voffset.base, 4095 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) @@ -423,7 +423,7 @@ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2) ret void @@ -456,7 +456,7 @@ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) + ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) ret void @@ -487,7 +487,7 @@ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) + ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll @@ -93,7 +93,7 @@ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_ADD_X2_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_ATOMIC_ADD_X2_BOTHEN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 4) ; CHECK-NEXT: S_ENDPGM 0 %ret = call i64 @llvm.amdgcn.struct.buffer.atomic.add.i64(i64 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -214,7 +214,7 @@ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_ADD_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; CHECK-NEXT: BUFFER_ATOMIC_ADD_BOTHEN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll @@ -47,8 +47,7 @@ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 + ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; CHECK-NEXT: S_ENDPGM 0 %ret = call i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -176,8 +175,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY11]], %subreg.sub0, [[COPY12]], %subreg.sub1 ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN]].sub0 + ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE3]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll @@ -34,7 +34,7 @@ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -71,7 +71,7 @@ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX90A-NEXT: S_ENDPGM 0 %voffset.add = add i32 %voffset, 4095 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset.add, i32 %soffset, i32 0) @@ -105,7 +105,7 @@ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 4095, i32 %soffset, i32 0) ret void @@ -139,7 +139,7 @@ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void @@ -252,7 +252,7 @@ ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1 - ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY8]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX90A-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX90A-NEXT: {{ $}} @@ -368,7 +368,7 @@ ; GFX90A-NEXT: bb.3: ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY7]], [[COPY8]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX90A-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc ; GFX90A-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; GFX90A-NEXT: {{ $}} @@ -415,7 +415,7 @@ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) ret void @@ -448,7 +448,7 @@ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 2) ret void @@ -485,7 +485,7 @@ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 - ; GFX90A-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) + ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -518,7 +518,7 @@ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX90A-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) + ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 4) ; GFX90A-NEXT: S_ENDPGM 0 %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/fp-min-max-buffer-atomics.ll b/llvm/test/CodeGen/AMDGPU/fp-min-max-buffer-atomics.ll --- a/llvm/test/CodeGen/AMDGPU/fp-min-max-buffer-atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/fp-min-max-buffer-atomics.ll @@ -63,7 +63,7 @@ ; G_SI-NEXT: s_waitcnt lgkmcnt(0) ; G_SI-NEXT: v_mov_b32_e32 v0, s4 ; G_SI-NEXT: v_mov_b32_e32 v1, s5 -; G_SI-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc +; G_SI-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen ; G_SI-NEXT: s_endpgm ; ; G_GFX7-LABEL: raw_buffer_atomic_min_noret_f32: @@ -73,7 +73,7 @@ ; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX7-NEXT: v_mov_b32_e32 v0, s4 ; G_GFX7-NEXT: v_mov_b32_e32 v1, s5 -; G_GFX7-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc +; G_GFX7-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen ; G_GFX7-NEXT: s_endpgm ; ; G_GFX10-LABEL: raw_buffer_atomic_min_noret_f32: @@ -84,7 +84,7 @@ ; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX10-NEXT: v_mov_b32_e32 v0, s2 ; G_GFX10-NEXT: v_mov_b32_e32 v1, s3 -; G_GFX10-NEXT: buffer_atomic_fmin v0, v1, s[4:7], 0 offen glc +; G_GFX10-NEXT: buffer_atomic_fmin v0, v1, s[4:7], 0 offen ; G_GFX10-NEXT: s_endpgm ; ; G_GFX1030-LABEL: raw_buffer_atomic_min_noret_f32: @@ -95,7 +95,7 @@ ; G_GFX1030-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX1030-NEXT: v_mov_b32_e32 v0, s4 ; G_GFX1030-NEXT: v_mov_b32_e32 v1, s5 -; G_GFX1030-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc +; G_GFX1030-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen ; G_GFX1030-NEXT: s_endpgm main_body: %ret = call float @llvm.amdgcn.raw.buffer.atomic.fmin.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) @@ -351,7 +351,7 @@ ; G_SI-NEXT: s_waitcnt lgkmcnt(0) ; G_SI-NEXT: v_mov_b32_e32 v0, s4 ; G_SI-NEXT: v_mov_b32_e32 v1, s5 -; G_SI-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc +; G_SI-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen ; G_SI-NEXT: s_endpgm ; ; G_GFX7-LABEL: raw_buffer_atomic_max_noret_f32: @@ -361,7 +361,7 @@ ; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX7-NEXT: v_mov_b32_e32 v0, s4 ; G_GFX7-NEXT: v_mov_b32_e32 v1, s5 -; G_GFX7-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc +; G_GFX7-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen ; G_GFX7-NEXT: s_endpgm ; ; G_GFX10-LABEL: raw_buffer_atomic_max_noret_f32: @@ -372,7 +372,7 @@ ; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX10-NEXT: v_mov_b32_e32 v0, s2 ; G_GFX10-NEXT: v_mov_b32_e32 v1, s3 -; G_GFX10-NEXT: buffer_atomic_fmax v0, v1, s[4:7], 0 offen glc +; G_GFX10-NEXT: buffer_atomic_fmax v0, v1, s[4:7], 0 offen ; G_GFX10-NEXT: s_endpgm ; ; G_GFX1030-LABEL: raw_buffer_atomic_max_noret_f32: @@ -383,7 +383,7 @@ ; G_GFX1030-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX1030-NEXT: v_mov_b32_e32 v0, s4 ; G_GFX1030-NEXT: v_mov_b32_e32 v1, s5 -; G_GFX1030-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc +; G_GFX1030-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen ; G_GFX1030-NEXT: s_endpgm main_body: %ret = call float @llvm.amdgcn.raw.buffer.atomic.fmax.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/fp-min-max-global-atomics-gfx10.ll b/llvm/test/CodeGen/AMDGPU/fp-min-max-global-atomics-gfx10.ll --- a/llvm/test/CodeGen/AMDGPU/fp-min-max-global-atomics-gfx10.ll +++ b/llvm/test/CodeGen/AMDGPU/fp-min-max-global-atomics-gfx10.ll @@ -28,7 +28,7 @@ ; G_GFX10-NEXT: v_mov_b32_e32 v1, 0 ; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX10-NEXT: v_mov_b32_e32 v0, s4 -; G_GFX10-NEXT: global_atomic_fmin v0, v1, v0, s[2:3] glc +; G_GFX10-NEXT: global_atomic_fmin v1, v0, s[2:3] ; G_GFX10-NEXT: s_endpgm main_body: %ret = call float @llvm.amdgcn.global.atomic.fmin.f32.p1f32.f32(float addrspace(1)* %ptr, float %data) @@ -55,7 +55,7 @@ ; G_GFX10-NEXT: v_mov_b32_e32 v1, 0 ; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX10-NEXT: v_mov_b32_e32 v0, s4 -; G_GFX10-NEXT: global_atomic_fmax v0, v1, v0, s[2:3] glc +; G_GFX10-NEXT: global_atomic_fmax v1, v0, s[2:3] ; G_GFX10-NEXT: s_endpgm main_body: %ret = call float @llvm.amdgcn.global.atomic.fmax.f32.p1f32.f32(float addrspace(1)* %ptr, float %data) @@ -122,7 +122,7 @@ ; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX10-NEXT: v_mov_b32_e32 v0, s2 ; G_GFX10-NEXT: v_mov_b32_e32 v1, s3 -; G_GFX10-NEXT: global_atomic_fmin_x2 v[0:1], v2, v[0:1], s[0:1] glc +; G_GFX10-NEXT: global_atomic_fmin_x2 v2, v[0:1], s[0:1] ; G_GFX10-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.global.atomic.fmin.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) @@ -147,7 +147,7 @@ ; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX10-NEXT: v_mov_b32_e32 v0, s2 ; G_GFX10-NEXT: v_mov_b32_e32 v1, s3 -; G_GFX10-NEXT: global_atomic_fmax_x2 v[0:1], v2, v[0:1], s[0:1] glc +; G_GFX10-NEXT: global_atomic_fmax_x2 v2, v[0:1], s[0:1] ; G_GFX10-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.global.atomic.fmax.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) diff --git a/llvm/test/CodeGen/AMDGPU/fp64-min-max-buffer-atomics.ll b/llvm/test/CodeGen/AMDGPU/fp64-min-max-buffer-atomics.ll --- a/llvm/test/CodeGen/AMDGPU/fp64-min-max-buffer-atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/fp64-min-max-buffer-atomics.ll @@ -73,7 +73,7 @@ ; G_SI-NEXT: v_mov_b32_e32 v0, s4 ; G_SI-NEXT: v_mov_b32_e32 v1, s5 ; G_SI-NEXT: v_mov_b32_e32 v2, s6 -; G_SI-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc +; G_SI-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen ; G_SI-NEXT: s_endpgm ; ; G_GFX7-LABEL: raw_buffer_atomic_min_noret_f64: @@ -85,7 +85,7 @@ ; G_GFX7-NEXT: v_mov_b32_e32 v0, s4 ; G_GFX7-NEXT: v_mov_b32_e32 v1, s5 ; G_GFX7-NEXT: v_mov_b32_e32 v2, s6 -; G_GFX7-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc +; G_GFX7-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen ; G_GFX7-NEXT: s_endpgm ; ; G_GFX10-LABEL: raw_buffer_atomic_min_noret_f64: @@ -98,7 +98,7 @@ ; G_GFX10-NEXT: v_mov_b32_e32 v0, s2 ; G_GFX10-NEXT: v_mov_b32_e32 v1, s3 ; G_GFX10-NEXT: v_mov_b32_e32 v2, s8 -; G_GFX10-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[4:7], 0 offen glc +; G_GFX10-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[4:7], 0 offen ; G_GFX10-NEXT: s_endpgm ; ; G_GFX1030-LABEL: raw_buffer_atomic_min_noret_f64: @@ -111,7 +111,7 @@ ; G_GFX1030-NEXT: v_mov_b32_e32 v0, s4 ; G_GFX1030-NEXT: v_mov_b32_e32 v1, s5 ; G_GFX1030-NEXT: v_mov_b32_e32 v2, s6 -; G_GFX1030-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc +; G_GFX1030-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen ; G_GFX1030-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) @@ -310,7 +310,7 @@ ; G_SI-NEXT: v_mov_b32_e32 v0, s4 ; G_SI-NEXT: v_mov_b32_e32 v1, s5 ; G_SI-NEXT: v_mov_b32_e32 v2, s6 -; G_SI-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc +; G_SI-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen ; G_SI-NEXT: s_endpgm ; ; G_GFX7-LABEL: raw_buffer_atomic_max_noret_f64: @@ -322,7 +322,7 @@ ; G_GFX7-NEXT: v_mov_b32_e32 v0, s4 ; G_GFX7-NEXT: v_mov_b32_e32 v1, s5 ; G_GFX7-NEXT: v_mov_b32_e32 v2, s6 -; G_GFX7-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc +; G_GFX7-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen ; G_GFX7-NEXT: s_endpgm ; ; G_GFX10-LABEL: raw_buffer_atomic_max_noret_f64: @@ -335,7 +335,7 @@ ; G_GFX10-NEXT: v_mov_b32_e32 v0, s2 ; G_GFX10-NEXT: v_mov_b32_e32 v1, s3 ; G_GFX10-NEXT: v_mov_b32_e32 v2, s8 -; G_GFX10-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[4:7], 0 offen glc +; G_GFX10-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[4:7], 0 offen ; G_GFX10-NEXT: s_endpgm ; ; G_GFX1030-LABEL: raw_buffer_atomic_max_noret_f64: @@ -348,7 +348,7 @@ ; G_GFX1030-NEXT: v_mov_b32_e32 v0, s4 ; G_GFX1030-NEXT: v_mov_b32_e32 v1, s5 ; G_GFX1030-NEXT: v_mov_b32_e32 v2, s6 -; G_GFX1030-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc +; G_GFX1030-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen ; G_GFX1030-NEXT: s_endpgm main_body: %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll b/llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll --- a/llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll @@ -175,7 +175,7 @@ ; G_SI-NEXT: ds_min_rtn_f32 v1, v1, v0 ; G_SI-NEXT: s_lshl_b32 s1, s2, 4 ; G_SI-NEXT: v_mov_b32_e32 v2, s1 -; G_SI-NEXT: ds_min_rtn_f32 v0, v2, v0 +; G_SI-NEXT: ds_min_f32 v2, v0 ; G_SI-NEXT: s_waitcnt lgkmcnt(0) ; G_SI-NEXT: v_mov_b32_e32 v0, s3 ; G_SI-NEXT: ds_min_rtn_f32 v0, v0, v1 @@ -203,9 +203,9 @@ ; G_GFX7-NEXT: ds_min_rtn_f32 v1, v1, v0 ; G_GFX7-NEXT: s_lshl_b32 s2, s2, 4 ; G_GFX7-NEXT: v_mov_b32_e32 v2, s2 -; G_GFX7-NEXT: ds_min_rtn_f32 v0, v2, v0 -; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) +; G_GFX7-NEXT: ds_min_f32 v2, v0 ; G_GFX7-NEXT: v_mov_b32_e32 v0, s1 +; G_GFX7-NEXT: s_waitcnt lgkmcnt(1) ; G_GFX7-NEXT: ds_min_rtn_f32 v0, v0, v1 ; G_GFX7-NEXT: v_mov_b32_e32 v1, s0 ; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) @@ -231,9 +231,9 @@ ; G_VI-NEXT: ds_min_rtn_f32 v1, v1, v0 ; G_VI-NEXT: s_lshl_b32 s2, s2, 4 ; G_VI-NEXT: v_mov_b32_e32 v2, s2 -; G_VI-NEXT: ds_min_rtn_f32 v0, v2, v0 -; G_VI-NEXT: s_waitcnt lgkmcnt(0) +; G_VI-NEXT: ds_min_f32 v2, v0 ; G_VI-NEXT: v_mov_b32_e32 v0, s1 +; G_VI-NEXT: s_waitcnt lgkmcnt(1) ; G_VI-NEXT: ds_min_rtn_f32 v0, v0, v1 ; G_VI-NEXT: v_mov_b32_e32 v1, s0 ; G_VI-NEXT: s_waitcnt lgkmcnt(0) @@ -258,9 +258,9 @@ ; G_GFX9-NEXT: ds_min_rtn_f32 v0, v0, v1 ; G_GFX9-NEXT: s_lshl_b32 s0, s4, 4 ; G_GFX9-NEXT: v_mov_b32_e32 v2, s0 -; G_GFX9-NEXT: ds_min_rtn_f32 v1, v2, v1 -; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) +; G_GFX9-NEXT: ds_min_f32 v2, v1 ; G_GFX9-NEXT: v_mov_b32_e32 v1, s3 +; G_GFX9-NEXT: s_waitcnt lgkmcnt(1) ; G_GFX9-NEXT: ds_min_rtn_f32 v0, v1, v0 ; G_GFX9-NEXT: v_mov_b32_e32 v1, s2 ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -286,10 +286,9 @@ ; G_GFX10-NEXT: v_mov_b32_e32 v2, s2 ; G_GFX10-NEXT: v_mov_b32_e32 v3, s1 ; G_GFX10-NEXT: ds_min_rtn_f32 v0, v0, v1 -; G_GFX10-NEXT: ds_min_rtn_f32 v1, v2, v1 +; G_GFX10-NEXT: ds_min_f32 v2, v1 ; G_GFX10-NEXT: s_waitcnt lgkmcnt(1) ; G_GFX10-NEXT: ds_min_rtn_f32 v0, v3, v0 -; G_GFX10-NEXT: s_waitcnt lgkmcnt(1) ; G_GFX10-NEXT: v_mov_b32_e32 v1, s0 ; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX10-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen @@ -464,7 +463,7 @@ ; G_SI-NEXT: ds_max_rtn_f32 v1, v1, v0 ; G_SI-NEXT: s_lshl_b32 s1, s2, 4 ; G_SI-NEXT: v_mov_b32_e32 v2, s1 -; G_SI-NEXT: ds_max_rtn_f32 v0, v2, v0 +; G_SI-NEXT: ds_max_f32 v2, v0 ; G_SI-NEXT: s_waitcnt lgkmcnt(0) ; G_SI-NEXT: v_mov_b32_e32 v0, s3 ; G_SI-NEXT: ds_max_rtn_f32 v0, v0, v1 @@ -492,9 +491,9 @@ ; G_GFX7-NEXT: ds_max_rtn_f32 v1, v1, v0 ; G_GFX7-NEXT: s_lshl_b32 s2, s2, 4 ; G_GFX7-NEXT: v_mov_b32_e32 v2, s2 -; G_GFX7-NEXT: ds_max_rtn_f32 v0, v2, v0 -; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) +; G_GFX7-NEXT: ds_max_f32 v2, v0 ; G_GFX7-NEXT: v_mov_b32_e32 v0, s1 +; G_GFX7-NEXT: s_waitcnt lgkmcnt(1) ; G_GFX7-NEXT: ds_max_rtn_f32 v0, v0, v1 ; G_GFX7-NEXT: v_mov_b32_e32 v1, s0 ; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) @@ -520,9 +519,9 @@ ; G_VI-NEXT: ds_max_rtn_f32 v1, v1, v0 ; G_VI-NEXT: s_lshl_b32 s2, s2, 4 ; G_VI-NEXT: v_mov_b32_e32 v2, s2 -; G_VI-NEXT: ds_max_rtn_f32 v0, v2, v0 -; G_VI-NEXT: s_waitcnt lgkmcnt(0) +; G_VI-NEXT: ds_max_f32 v2, v0 ; G_VI-NEXT: v_mov_b32_e32 v0, s1 +; G_VI-NEXT: s_waitcnt lgkmcnt(1) ; G_VI-NEXT: ds_max_rtn_f32 v0, v0, v1 ; G_VI-NEXT: v_mov_b32_e32 v1, s0 ; G_VI-NEXT: s_waitcnt lgkmcnt(0) @@ -547,9 +546,9 @@ ; G_GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1 ; G_GFX9-NEXT: s_lshl_b32 s0, s4, 4 ; G_GFX9-NEXT: v_mov_b32_e32 v2, s0 -; G_GFX9-NEXT: ds_max_rtn_f32 v1, v2, v1 -; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) +; G_GFX9-NEXT: ds_max_f32 v2, v1 ; G_GFX9-NEXT: v_mov_b32_e32 v1, s3 +; G_GFX9-NEXT: s_waitcnt lgkmcnt(1) ; G_GFX9-NEXT: ds_max_rtn_f32 v0, v1, v0 ; G_GFX9-NEXT: v_mov_b32_e32 v1, s2 ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -575,10 +574,9 @@ ; G_GFX10-NEXT: v_mov_b32_e32 v2, s2 ; G_GFX10-NEXT: v_mov_b32_e32 v3, s1 ; G_GFX10-NEXT: ds_max_rtn_f32 v0, v0, v1 -; G_GFX10-NEXT: ds_max_rtn_f32 v1, v2, v1 +; G_GFX10-NEXT: ds_max_f32 v2, v1 ; G_GFX10-NEXT: s_waitcnt lgkmcnt(1) ; G_GFX10-NEXT: ds_max_rtn_f32 v0, v3, v0 -; G_GFX10-NEXT: s_waitcnt lgkmcnt(1) ; G_GFX10-NEXT: v_mov_b32_e32 v1, s0 ; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX10-NEXT: buffer_store_dword v0, v1, s[4:7], 0 offen @@ -783,7 +781,7 @@ ; G_SI-NEXT: ds_min_rtn_f64 v[2:3], v2, v[0:1] ; G_SI-NEXT: s_lshl_b32 s1, s4, 4 ; G_SI-NEXT: v_mov_b32_e32 v4, s1 -; G_SI-NEXT: ds_min_rtn_f64 v[0:1], v4, v[0:1] +; G_SI-NEXT: ds_min_f64 v4, v[0:1] ; G_SI-NEXT: s_waitcnt lgkmcnt(0) ; G_SI-NEXT: v_mov_b32_e32 v0, s5 ; G_SI-NEXT: ds_min_rtn_f64 v[0:1], v0, v[2:3] @@ -818,9 +816,9 @@ ; G_GFX7-NEXT: ds_min_rtn_f64 v[2:3], v2, v[0:1] ; G_GFX7-NEXT: s_lshl_b32 s2, s4, 4 ; G_GFX7-NEXT: v_mov_b32_e32 v4, s2 -; G_GFX7-NEXT: ds_min_rtn_f64 v[0:1], v4, v[0:1] -; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) +; G_GFX7-NEXT: ds_min_f64 v4, v[0:1] ; G_GFX7-NEXT: v_mov_b32_e32 v0, s1 +; G_GFX7-NEXT: s_waitcnt lgkmcnt(1) ; G_GFX7-NEXT: ds_min_rtn_f64 v[0:1], v0, v[2:3] ; G_GFX7-NEXT: v_mov_b32_e32 v2, s0 ; G_GFX7-NEXT: s_add_u32 s0, s0, 4 @@ -852,9 +850,9 @@ ; G_VI-NEXT: ds_min_rtn_f64 v[2:3], v2, v[0:1] ; G_VI-NEXT: s_lshl_b32 s2, s4, 4 ; G_VI-NEXT: v_mov_b32_e32 v4, s2 -; G_VI-NEXT: ds_min_rtn_f64 v[0:1], v4, v[0:1] -; G_VI-NEXT: s_waitcnt lgkmcnt(0) +; G_VI-NEXT: ds_min_f64 v4, v[0:1] ; G_VI-NEXT: v_mov_b32_e32 v0, s1 +; G_VI-NEXT: s_waitcnt lgkmcnt(1) ; G_VI-NEXT: ds_min_rtn_f64 v[0:1], v0, v[2:3] ; G_VI-NEXT: v_mov_b32_e32 v2, s0 ; G_VI-NEXT: s_add_u32 s0, s0, 4 @@ -886,8 +884,8 @@ ; G_GFX9-NEXT: s_lshl_b32 s0, s4, 4 ; G_GFX9-NEXT: v_mov_b32_e32 v5, s0 ; G_GFX9-NEXT: v_mov_b32_e32 v4, s3 -; G_GFX9-NEXT: ds_min_rtn_f64 v[0:1], v5, v[0:1] -; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) +; G_GFX9-NEXT: ds_min_f64 v5, v[0:1] +; G_GFX9-NEXT: s_waitcnt lgkmcnt(1) ; G_GFX9-NEXT: ds_min_rtn_f64 v[0:1], v4, v[2:3] ; G_GFX9-NEXT: v_mov_b32_e32 v2, s2 ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -918,8 +916,8 @@ ; G_GFX10-NEXT: v_mov_b32_e32 v2, s5 ; G_GFX10-NEXT: v_mov_b32_e32 v4, s0 ; G_GFX10-NEXT: ds_min_rtn_f64 v[2:3], v2, v[0:1] -; G_GFX10-NEXT: ds_min_rtn_f64 v[0:1], v4, v[0:1] -; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) +; G_GFX10-NEXT: ds_min_f64 v4, v[0:1] +; G_GFX10-NEXT: s_waitcnt lgkmcnt(1) ; G_GFX10-NEXT: ds_min_rtn_f64 v[0:1], v5, v[2:3] ; G_GFX10-NEXT: v_mov_b32_e32 v2, s2 ; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) @@ -1126,7 +1124,7 @@ ; G_SI-NEXT: ds_max_rtn_f64 v[2:3], v2, v[0:1] ; G_SI-NEXT: s_lshl_b32 s1, s4, 4 ; G_SI-NEXT: v_mov_b32_e32 v4, s1 -; G_SI-NEXT: ds_max_rtn_f64 v[0:1], v4, v[0:1] +; G_SI-NEXT: ds_max_f64 v4, v[0:1] ; G_SI-NEXT: s_waitcnt lgkmcnt(0) ; G_SI-NEXT: v_mov_b32_e32 v0, s5 ; G_SI-NEXT: ds_max_rtn_f64 v[0:1], v0, v[2:3] @@ -1161,9 +1159,9 @@ ; G_GFX7-NEXT: ds_max_rtn_f64 v[2:3], v2, v[0:1] ; G_GFX7-NEXT: s_lshl_b32 s2, s4, 4 ; G_GFX7-NEXT: v_mov_b32_e32 v4, s2 -; G_GFX7-NEXT: ds_max_rtn_f64 v[0:1], v4, v[0:1] -; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) +; G_GFX7-NEXT: ds_max_f64 v4, v[0:1] ; G_GFX7-NEXT: v_mov_b32_e32 v0, s1 +; G_GFX7-NEXT: s_waitcnt lgkmcnt(1) ; G_GFX7-NEXT: ds_max_rtn_f64 v[0:1], v0, v[2:3] ; G_GFX7-NEXT: v_mov_b32_e32 v2, s0 ; G_GFX7-NEXT: s_add_u32 s0, s0, 4 @@ -1195,9 +1193,9 @@ ; G_VI-NEXT: ds_max_rtn_f64 v[2:3], v2, v[0:1] ; G_VI-NEXT: s_lshl_b32 s2, s4, 4 ; G_VI-NEXT: v_mov_b32_e32 v4, s2 -; G_VI-NEXT: ds_max_rtn_f64 v[0:1], v4, v[0:1] -; G_VI-NEXT: s_waitcnt lgkmcnt(0) +; G_VI-NEXT: ds_max_f64 v4, v[0:1] ; G_VI-NEXT: v_mov_b32_e32 v0, s1 +; G_VI-NEXT: s_waitcnt lgkmcnt(1) ; G_VI-NEXT: ds_max_rtn_f64 v[0:1], v0, v[2:3] ; G_VI-NEXT: v_mov_b32_e32 v2, s0 ; G_VI-NEXT: s_add_u32 s0, s0, 4 @@ -1229,8 +1227,8 @@ ; G_GFX9-NEXT: s_lshl_b32 s0, s4, 4 ; G_GFX9-NEXT: v_mov_b32_e32 v5, s0 ; G_GFX9-NEXT: v_mov_b32_e32 v4, s3 -; G_GFX9-NEXT: ds_max_rtn_f64 v[0:1], v5, v[0:1] -; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) +; G_GFX9-NEXT: ds_max_f64 v5, v[0:1] +; G_GFX9-NEXT: s_waitcnt lgkmcnt(1) ; G_GFX9-NEXT: ds_max_rtn_f64 v[0:1], v4, v[2:3] ; G_GFX9-NEXT: v_mov_b32_e32 v2, s2 ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -1261,8 +1259,8 @@ ; G_GFX10-NEXT: v_mov_b32_e32 v2, s5 ; G_GFX10-NEXT: v_mov_b32_e32 v4, s0 ; G_GFX10-NEXT: ds_max_rtn_f64 v[2:3], v2, v[0:1] -; G_GFX10-NEXT: ds_max_rtn_f64 v[0:1], v4, v[0:1] -; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) +; G_GFX10-NEXT: ds_max_f64 v4, v[0:1] +; G_GFX10-NEXT: s_waitcnt lgkmcnt(1) ; G_GFX10-NEXT: ds_max_rtn_f64 v[0:1], v5, v[2:3] ; G_GFX10-NEXT: v_mov_b32_e32 v2, s2 ; G_GFX10-NEXT: s_waitcnt lgkmcnt(0)