diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -622,6 +622,13 @@ [FeatureFlatGlobalInsts] >; +def FeatureFlatAtomicFaddF32Inst + : SubtargetFeature<"flat-atomic-fadd-f32-inst", + "HasFlatAtomicFaddF32Inst", + "true", + "Has flat_atomic_add_f32 instruction" +>; + def FeatureSupportsSRAMECC : SubtargetFeature<"sramecc-support", "SupportsSRAMECC", "true", @@ -1142,6 +1149,7 @@ FeatureAtomicFaddRtnInsts, FeatureAtomicFaddNoRtnInsts, FeatureAtomicPkFaddNoRtnInsts, + FeatureFlatAtomicFaddF32Inst, FeatureSupportsSRAMECC, FeaturePackedTID, FeatureArchitectedFlatScratch, @@ -1281,6 +1289,7 @@ FeatureArchitectedFlatScratch, FeatureAtomicFaddRtnInsts, FeatureAtomicFaddNoRtnInsts, + FeatureFlatAtomicFaddF32Inst, FeatureImageInsts, FeaturePackedTID, FeatureVcmpxPermlaneHazard, @@ -1510,11 +1519,6 @@ Predicate<"Subtarget->hasGFX940Insts()">, AssemblerPredicate<(all_of FeatureGFX940Insts)>; -def isGFX940GFX11Plus : - Predicate<"Subtarget->hasGFX940Insts() ||" - "Subtarget->getGeneration() >= AMDGPUSubtarget::GFX11">, - AssemblerPredicate<(any_of FeatureGFX940Insts, FeatureGFX11Insts)>; - def isGFX8GFX9NotGFX940 : Predicate<"!Subtarget->hasGFX940Insts() &&" "(Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||" @@ -1751,6 +1755,9 @@ def HasAtomicPkFaddNoRtnInsts : Predicate<"Subtarget->hasAtomicPkFaddNoRtnInsts()">, AssemblerPredicate<(all_of FeatureAtomicPkFaddNoRtnInsts)>; +def HasFlatAtomicFaddF32Inst + : Predicate<"Subtarget->hasFlatAtomicFaddF32Inst()">, + AssemblerPredicate<(all_of FeatureFlatAtomicFaddF32Inst)>; def HasDsSrc2Insts : Predicate<"!Subtarget->hasDsSrc2Insts()">, AssemblerPredicate<(all_of FeatureDsSrc2Insts)>; diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -728,9 +728,9 @@ } // End SubtargetPredicate = isGFX7GFX10GFX11 // GFX940-, GFX11-only flat instructions. -let SubtargetPredicate = isGFX940GFX11Plus in { +let SubtargetPredicate = HasFlatAtomicFaddF32Inst in { defm FLAT_ATOMIC_ADD_F32 : FLAT_Atomic_Pseudo<"flat_atomic_add_f32", VGPR_32, f32>; -} // End SubtargetPredicate = isGFX940GFX11Plus +} // End SubtargetPredicate = HasFlatAtomicFaddF32Inst defm GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>; defm GLOBAL_LOAD_SBYTE : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>; @@ -1476,10 +1476,13 @@ defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax", f64>; } +let OtherPredicates = [HasFlatAtomicFaddF32Inst] in { +defm : FlatSignedAtomicPat <"FLAT_ATOMIC_ADD_F32", "atomic_load_fadd_flat", f32>; +defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", f32>; +} + let OtherPredicates = [isGFX940Plus] in { -defm : FlatSignedAtomicPat <"FLAT_ATOMIC_ADD_F32", "atomic_load_fadd_flat", f32>; defm : FlatSignedAtomicPat <"FLAT_ATOMIC_PK_ADD_F16", "atomic_load_fadd_v2f16_flat", v2f16>; -defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", f32>; defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", v2f16>; defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_PK_ADD_BF16", "int_amdgcn_flat_atomic_fadd_v2bf16", v2i16>; defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_PK_ADD_BF16", "int_amdgcn_global_atomic_fadd_v2bf16", v2i16>; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -150,6 +150,7 @@ bool HasAtomicFaddRtnInsts = false; bool HasAtomicFaddNoRtnInsts = false; bool HasAtomicPkFaddNoRtnInsts = false; + bool HasFlatAtomicFaddF32Inst = false; bool SupportsSRAMECC = false; // This should not be used directly. 'TargetID' tracks the dynamic settings @@ -746,6 +747,8 @@ bool hasAtomicPkFaddNoRtnInsts() const { return HasAtomicPkFaddNoRtnInsts; } + bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; } + bool hasNoSdstCMPX() const { return HasNoSdstCMPX; } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll @@ -5,6 +5,7 @@ declare <2 x half> @llvm.amdgcn.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i1) declare float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)*, float) declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)*, <2 x half>) +declare float @llvm.amdgcn.flat.atomic.fadd.f32.p0f32.f32(float*, float) ; GCN-LABEL: {{^}}buffer_atomic_add_f32: ; GCN: buffer_atomic_add_f32 v0, v1, s[0:3], 0 idxen @@ -99,4 +100,12 @@ ret void } +; GCN-LABEL: {{^}}flat_atomic_fadd_f32_wrong_subtarget: +; GCN: flat_atomic_add_f32 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} +define amdgpu_kernel void @flat_atomic_fadd_f32_wrong_subtarget(float* %ptr, float %data) #1 { + %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0f32.f32(float* %ptr, float %data) + ret void +} + attributes #0 = { "target-cpu"="gfx803" "target-features"="+atomic-fadd-no-rtn-insts"} +attributes #1 = { "target-cpu"="gfx803" "target-features"="+flat-atomic-fadd-f32-inst"}