diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -566,11 +566,28 @@ "Has v_pk_fmac_f16 instruction" >; -def FeatureAtomicFaddInsts : SubtargetFeature<"atomic-fadd-insts", - "HasAtomicFaddInsts", +def FeatureAtomicFaddRtnInsts : SubtargetFeature<"atomic-fadd-rtn-insts", + "HasAtomicFaddRtnInsts", "true", - "Has buffer_atomic_add_f32, buffer_atomic_pk_add_f16, global_atomic_add_f32, " - "global_atomic_pk_add_f16 instructions", + "Has buffer_atomic_add_f32 and global_atomic_add_f32 instructions that " + "return original value", + [FeatureFlatGlobalInsts] +>; + +def FeatureAtomicFaddNoRtnInsts : SubtargetFeature<"atomic-fadd-no-rtn-insts", + "HasAtomicFaddNoRtnInsts", + "true", + "Has buffer_atomic_add_f32 and global_atomic_add_f32 instructions that " + "don't return original value", + [FeatureFlatGlobalInsts] +>; + +def FeatureAtomicPkFaddNoRtnInsts + : SubtargetFeature<"atomic-pk-fadd-no-rtn-insts", + "HasAtomicPkFaddNoRtnInsts", + "true", + "Has buffer_atomic_pk_add_f16 and global_atomic_pk_add_f16 instructions that " + "don't return original value", [FeatureFlatGlobalInsts] >; @@ -988,7 +1005,8 @@ FeatureDot7Insts, FeatureMAIInsts, FeaturePkFmacF16Inst, - FeatureAtomicFaddInsts, + FeatureAtomicFaddNoRtnInsts, + FeatureAtomicPkFaddNoRtnInsts, FeatureSupportsSRAMECC, FeatureMFMAInlineLiteralBug, FeatureImageGather4D16Bug]>; @@ -1020,7 +1038,9 @@ FeaturePackedFP32Ops, FeatureMAIInsts, FeaturePkFmacF16Inst, - FeatureAtomicFaddInsts, + FeatureAtomicFaddRtnInsts, + FeatureAtomicFaddNoRtnInsts, + FeatureAtomicPkFaddNoRtnInsts, FeatureImageInsts, FeatureMadMacF32Insts, FeatureSupportsSRAMECC, @@ -1055,7 +1075,9 @@ FeaturePackedFP32Ops, FeatureMAIInsts, FeaturePkFmacF16Inst, - FeatureAtomicFaddInsts, + FeatureAtomicFaddRtnInsts, + FeatureAtomicFaddNoRtnInsts, + FeatureAtomicPkFaddNoRtnInsts, FeatureSupportsSRAMECC, FeaturePackedTID, FeatureArchitectedFlatScratch, @@ -1544,8 +1566,13 @@ def HasFmaLegacy32 : Predicate<"Subtarget->hasGFX10_3Insts()">, AssemblerPredicate<(any_of FeatureGFX10_3Insts)>; -def HasAtomicFaddInsts : Predicate<"Subtarget->hasAtomicFaddInsts()">, - AssemblerPredicate<(all_of FeatureAtomicFaddInsts)>; +def HasAtomicFaddRtnInsts : Predicate<"Subtarget->hasAtomicFaddRtnInsts()">, + AssemblerPredicate<(all_of FeatureAtomicFaddRtnInsts)>; +def HasAtomicFaddNoRtnInsts : Predicate<"Subtarget->hasAtomicFaddNoRtnInsts()">, + AssemblerPredicate<(all_of FeatureAtomicFaddNoRtnInsts)>; +def HasAtomicPkFaddNoRtnInsts + : Predicate<"Subtarget->hasAtomicPkFaddNoRtnInsts()">, + AssemblerPredicate<(all_of FeatureAtomicPkFaddNoRtnInsts)>; def HasDsSrc2Insts : Predicate<"!Subtarget->hasDsSrc2Insts()">, AssemblerPredicate<(all_of FeatureDsSrc2Insts)>; diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1119,23 +1119,25 @@ def BUFFER_WBINVL1 : MUBUF_Invalidate <"buffer_wbinvl1", int_amdgcn_buffer_wbinvl1>; -let SubtargetPredicate = HasAtomicFaddInsts in { -defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_NO_RTN < +let SubtargetPredicate = HasAtomicFaddNoRtnInsts in +defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_NO_RTN< "buffer_atomic_add_f32", VGPR_32, f32 >; + +let SubtargetPredicate = HasAtomicPkFaddNoRtnInsts in defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_NO_RTN < "buffer_atomic_pk_add_f16", VGPR_32, v2f16 >; -let OtherPredicates = [isGFX90APlus] in { -defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_RTN < +let OtherPredicates = [HasAtomicFaddRtnInsts] in +defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_RTN< "buffer_atomic_add_f32", VGPR_32, f32, atomic_load_fadd_global_32 >; + +let OtherPredicates = [isGFX90APlus] in defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_RTN < "buffer_atomic_pk_add_f16", VGPR_32, v2f16, atomic_load_fadd_v2f16_global_32 >; -} -} // End SubtargetPredicate = HasAtomicFaddInsts //===----------------------------------------------------------------------===// // MTBUF Instructions @@ -1597,10 +1599,14 @@ >; } -let SubtargetPredicate = HasAtomicFaddInsts in { +let SubtargetPredicate = HasAtomicFaddNoRtnInsts in defm : BufferAtomicPatterns_NO_RTN; + +let SubtargetPredicate = HasAtomicPkFaddNoRtnInsts in defm : BufferAtomicPatterns_NO_RTN; -} + +let SubtargetPredicate = HasAtomicFaddRtnInsts in + defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f32, "BUFFER_ATOMIC_ADD_F32">; let SubtargetPredicate = isGFX90APlus in { defm : BufferAtomicIntrPat<"int_amdgcn_global_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">; @@ -2634,12 +2640,12 @@ def BUFFER_WBINVL1_VOL_vi : MUBUF_Real_vi <0x3f, BUFFER_WBINVL1_VOL>; } // End AssemblerPredicate = isGFX8GFX9 -let SubtargetPredicate = HasAtomicFaddInsts in { +let SubtargetPredicate = HasAtomicFaddNoRtnInsts in { defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Real_Atomic_vi <0x4d>; defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Real_Atomic_vi <0x4e>; -} // End SubtargetPredicate = HasAtomicFaddInsts +} // End SubtargetPredicate = HasAtomicFaddNoRtnInsts let SubtargetPredicate = isGFX90APlus in { defm BUFFER_ATOMIC_ADD_F64 : MUBUF_Real_Atomic_vi<0x4f>; diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -901,23 +901,22 @@ } // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1 let is_flat_global = 1 in { -let OtherPredicates = [HasAtomicFaddInsts] in { +let OtherPredicates = [HasAtomicFaddNoRtnInsts] in defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN < "global_atomic_add_f32", VGPR_32, f32 >; +let OtherPredicates = [HasAtomicPkFaddNoRtnInsts] in defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN < "global_atomic_pk_add_f16", VGPR_32, v2f16 >; -} // End OtherPredicates = [HasAtomicFaddInsts] - -let OtherPredicates = [isGFX90APlus] in { +let OtherPredicates = [HasAtomicFaddRtnInsts] in defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_RTN < "global_atomic_add_f32", VGPR_32, f32 >; +let OtherPredicates = [isGFX90APlus] in defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_RTN < "global_atomic_pk_add_f16", VGPR_32, v2f16 >; -} // End OtherPredicates = [isGFX90APlus] } // End is_flat_global = 1 //===----------------------------------------------------------------------===// @@ -1445,10 +1444,10 @@ defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX_X2", "int_amdgcn_global_atomic_fmax", f64>; } -let OtherPredicates = [HasAtomicFaddInsts] in { +let OtherPredicates = [HasAtomicFaddNoRtnInsts] in defm : GlobalFLATNoRtnAtomicPats ; +let OtherPredicates = [HasAtomicPkFaddNoRtnInsts] in defm : GlobalFLATNoRtnAtomicPats ; -} let OtherPredicates = [isGFX90APlus] in { defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F32", "atomic_load_fadd_global", f32>; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -143,7 +143,9 @@ bool HasDot7Insts = false; bool HasMAIInsts = false; bool HasPkFmacF16Inst = false; - bool HasAtomicFaddInsts = false; + bool HasAtomicFaddRtnInsts = false; + bool HasAtomicFaddNoRtnInsts = false; + bool HasAtomicPkFaddNoRtnInsts = false; bool SupportsSRAMECC = false; // This should not be used directly. 'TargetID' tracks the dynamic settings @@ -709,9 +711,15 @@ } bool hasAtomicFaddInsts() const { - return HasAtomicFaddInsts; + return HasAtomicFaddRtnInsts || HasAtomicFaddNoRtnInsts; } + bool hasAtomicFaddRtnInsts() const { return HasAtomicFaddRtnInsts; } + + bool hasAtomicFaddNoRtnInsts() const { return HasAtomicFaddNoRtnInsts; } + + bool hasAtomicPkFaddNoRtnInsts() const { return HasAtomicPkFaddNoRtnInsts; } + bool hasNoSdstCMPX() const { return HasNoSdstCMPX; } diff --git a/llvm/test/CodeGen/AMDGPU/global-atomics-fp-wrong-subtarget.ll b/llvm/test/CodeGen/AMDGPU/global-atomics-fp-wrong-subtarget.ll --- a/llvm/test/CodeGen/AMDGPU/global-atomics-fp-wrong-subtarget.ll +++ b/llvm/test/CodeGen/AMDGPU/global-atomics-fp-wrong-subtarget.ll @@ -23,4 +23,4 @@ ret void } -attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-features"="+atomic-fadd-insts" "amdgpu-unsafe-fp-atomics"="true" } +attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-features"="+atomic-fadd-no-rtn-insts" "amdgpu-unsafe-fp-atomics"="true" } diff --git a/llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll b/llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll --- a/llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll +++ b/llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll @@ -800,5 +800,5 @@ } attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" } -attributes #1 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-cpu"="gfx803" "target-features"="+atomic-fadd-insts" "amdgpu-unsafe-fp-atomics"="true" } +attributes #1 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-cpu"="gfx803" "target-features"="+atomic-fadd-no-rtn-insts" "amdgpu-unsafe-fp-atomics"="true" } attributes #2 = { "amdgpu-unsafe-fp-atomics"="true" } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll @@ -99,4 +99,4 @@ ret void } -attributes #0 = { "target-cpu"="gfx803" "target-features"="+atomic-fadd-insts" } +attributes #0 = { "target-cpu"="gfx803" "target-features"="+atomic-fadd-no-rtn-insts"}