Index: llvm/lib/Target/AMDGPU/AMDGPU.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPU.td +++ llvm/lib/Target/AMDGPU/AMDGPU.td @@ -451,7 +451,8 @@ "HasAtomicFaddInsts", "true", "Has buffer_atomic_add_f32, buffer_atomic_pk_add_f16, global_atomic_add_f32, " - "global_atomic_pk_add_f16 instructions" + "global_atomic_pk_add_f16 instructions", + [FeatureFlatGlobalInsts] >; def FeatureDoesNotSupportSRAMECC : SubtargetFeature<"no-sram-ecc-support", Index: llvm/lib/Target/AMDGPU/FLATInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/FLATInstructions.td +++ llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -175,7 +175,7 @@ } multiclass FLAT_Global_Load_Pseudo { - let is_flat_global = 1 in { + let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { def "" : FLAT_Load_Pseudo, GlobalSaddrTable<0, opName>; def _SADDR : FLAT_Load_Pseudo, @@ -184,7 +184,7 @@ } multiclass FLAT_Global_Store_Pseudo { - let is_flat_global = 1 in { + let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { def "" : FLAT_Store_Pseudo, GlobalSaddrTable<0, opName>; def _SADDR : FLAT_Store_Pseudo, @@ -369,10 +369,12 @@ SDPatternOperator atomic_rtn = null_frag, SDPatternOperator atomic_no_rtn = null_frag, ValueType data_vt = vt, - RegisterClass data_rc = vdst_rc> : - FLAT_Global_Atomic_Pseudo_NO_RTN, - FLAT_Global_Atomic_Pseudo_RTN; - + RegisterClass data_rc = vdst_rc> { + let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { + defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN; + defm "" : FLAT_Global_Atomic_Pseudo_RTN; + } +} //===----------------------------------------------------------------------===// // Flat Instructions @@ -509,7 +511,6 @@ } // End SubtargetPredicate = isGFX7GFX10 -let SubtargetPredicate = HasFlatGlobalInsts in { defm GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>; defm GLOBAL_LOAD_SBYTE : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>; defm GLOBAL_LOAD_USHORT : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>; @@ -619,7 +620,6 @@ VReg_64, i64, atomic_dec_global_64>; } // End is_flat_global = 1 -} // End SubtargetPredicate = HasFlatGlobalInsts let SubtargetPredicate = HasFlatScratchInsts in { Index: llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll +++ llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll @@ -27,3 +27,19 @@ %result = atomicrmw fadd float addrspace(1)* %ptr, float 4.0 seq_cst ret void } + +; Make sure this artificially selects with an incorrect subtarget, but the feature set. +; GCN-LABEL: {{^}}global_atomic_fadd_ret_f32_wrong_subtarget: +define amdgpu_kernel void @global_atomic_fadd_ret_f32_wrong_subtarget(float addrspace(1)* %ptr) #0 { + %result = atomicrmw fadd float addrspace(1)* %ptr, float 4.0 seq_cst + store float %result, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}global_atomic_fadd_noret_f32_wrong_subtarget: +define amdgpu_kernel void @global_atomic_fadd_noret_f32_wrong_subtarget(float addrspace(1)* %ptr) #0 { + %result = atomicrmw fadd float addrspace(1)* %ptr, float 4.0 seq_cst + ret void +} + +attributes #0 = { "target-cpu"="gfx803" "target-features"="+atomic-fadd-insts" } Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll +++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll @@ -70,3 +70,14 @@ call void @llvm.amdgcn.global.atomic.fadd.p1v2f16.v2f16(<2 x half> addrspace(1)* %p, <2 x half> %data) ret void } + +; Make sure this artificially selects with an incorrect subtarget, but +; the feature set. +; GCN-LABEL: {{^}}global_atomic_fadd_f32_wrong_subtarget: +; GCN: global_atomic_add_f32 v[{{[0-9:]+}}], v{{[0-9]+}}, off +define amdgpu_kernel void @global_atomic_fadd_f32_wrong_subtarget(float addrspace(1)* %ptr, float %data) #0 { + call void @llvm.amdgcn.global.atomic.fadd.p1f32.f32(float addrspace(1)* %ptr, float %data) + ret void +} + +attributes #0 = { "target-cpu"="gfx803" "target-features"="+atomic-fadd-insts" }