Index: include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- include/llvm/IR/IntrinsicsAMDGPU.td +++ include/llvm/IR/IntrinsicsAMDGPU.td @@ -294,6 +294,23 @@ def int_amdgcn_atomic_inc : AMDGPUAtomicIncIntrin; def int_amdgcn_atomic_dec : AMDGPUAtomicIncIntrin; +class AMDGPUAtomicF32Intrin : Intrinsic<[llvm_float_ty], + [LLVMQualPointerType, llvm_float_ty], + [IntrArgMemOnly, NoCapture<0>] +>; + +class AMDGPUAtomicF32IntrinNORET : Intrinsic<[], + [LLVMQualPointerType, llvm_float_ty], + [IntrArgMemOnly, NoCapture<0>] +>; + +def int_amdgcn_ds_add_rtn_f32 : AMDGPUAtomicF32Intrin; +def int_amdgcn_ds_min_rtn_f32 : AMDGPUAtomicF32Intrin; +def int_amdgcn_ds_max_rtn_f32 : AMDGPUAtomicF32Intrin; +def int_amdgcn_ds_add_f32 : AMDGPUAtomicF32IntrinNORET; +def int_amdgcn_ds_min_f32 : AMDGPUAtomicF32IntrinNORET; +def int_amdgcn_ds_max_f32 : AMDGPUAtomicF32IntrinNORET; + class AMDGPUImageLoad : Intrinsic < [llvm_anyfloat_ty], // vdata(VGPR) [llvm_anyint_ty, // vaddr(VGPR) Index: lib/Target/AMDGPU/DSInstructions.td =================================================================== --- lib/Target/AMDGPU/DSInstructions.td +++ lib/Target/AMDGPU/DSInstructions.td @@ -275,7 +275,6 @@ def DS_AND_B32 : DS_1A1D_NORET<"ds_and_b32">; def DS_OR_B32 : DS_1A1D_NORET<"ds_or_b32">; def DS_XOR_B32 : DS_1A1D_NORET<"ds_xor_b32">; -def DS_ADD_F32 : DS_1A1D_NORET<"ds_add_f32">; def DS_MIN_F32 : DS_1A1D_NORET<"ds_min_f32">; def DS_MAX_F32 : DS_1A1D_NORET<"ds_max_f32">; @@ -326,8 +325,6 @@ def DS_ADD_RTN_U32 : DS_1A1D_RET<"ds_add_rtn_u32">, AtomicNoRet<"ds_add_u32", 1>; -def DS_ADD_RTN_F32 : DS_1A1D_RET<"ds_add_rtn_f32">, - AtomicNoRet<"ds_add_f32", 1>; def DS_SUB_RTN_U32 : DS_1A1D_RET<"ds_sub_rtn_u32">, AtomicNoRet<"ds_sub_u32", 1>; def DS_RSUB_RTN_U32 : DS_1A1D_RET<"ds_rsub_rtn_u32">, @@ -525,6 +522,10 @@ int_amdgcn_ds_bpermute>; } +def DS_ADD_RTN_F32 : DS_1A1D_RET<"ds_add_rtn_f32">, + AtomicNoRet<"ds_add_f32", 1>; +def DS_ADD_F32 : DS_1A1D_NORET<"ds_add_f32">; + } // let SubtargetPredicate = isVI //===----------------------------------------------------------------------===// @@ -615,6 +616,21 @@ def : DSAtomicRetPat; def : DSAtomicCmpXChg; +multiclass DSAtomicPatF32 { + def : Pat < + (!cast(NAME#"_rtn_f32") (DS1Addr1Offset i32:$ptr), f32:$value), + (!cast(op#"_RTN_F32") $ptr, $value, (i16 0), (i1 0)) + >; + def : Pat < + (!cast(NAME#"_f32") (DS1Addr1Offset i32:$ptr), f32:$value), + (!cast(op#"_F32") $ptr, $value, (i16 0), (i1 0)) + >; +} + +defm int_amdgcn_ds_add : DSAtomicPatF32<"DS_ADD">; +defm int_amdgcn_ds_min : DSAtomicPatF32<"DS_MIN">; +defm int_amdgcn_ds_max : DSAtomicPatF32<"DS_MAX">; + // 64-bit atomics. def : DSAtomicRetPat; def : DSAtomicRetPat; Index: test/CodeGen/AMDGPU/llvm.amdgcn.ds.f32.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.ds.f32.ll @@ -0,0 +1,28 @@ +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s + +declare float @llvm.amdgcn.ds.add.rtn.f32(float addrspace(3)*, float) #0 +declare void @llvm.amdgcn.ds.add.f32(float addrspace(3)*, float) #0 +declare float @llvm.amdgcn.ds.min.rtn.f32(float addrspace(3)*, float) #0 +declare void @llvm.amdgcn.ds.min.f32(float addrspace(3)*, float) #0 +declare float @llvm.amdgcn.ds.max.rtn.f32(float addrspace(3)*, float) #0 +declare void @llvm.amdgcn.ds.max.f32(float addrspace(3)*, float) #0 + +; FUNC-LABEL: {{^}}ds_f32: +; CHECK: ds_add_f32 v{{[0-9]+}}, v{{[0-9]+}} +; CHECK: ds_add_rtn_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; CHECK: ds_min_f32 v{{[0-9]+}}, v{{[0-9]+}} +; CHECK: ds_min_rtn_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; CHECK: ds_max_f32 v{{[0-9]+}}, v{{[0-9]+}} +; CHECK: ds_max_rtn_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +define amdgpu_kernel void @ds_f32(float addrspace(1)* %out, float addrspace(3)* %src1, float %src2) nounwind { + call void @llvm.amdgcn.ds.add.f32(float addrspace(3)* %src1, float %src2) + %res = call float @llvm.amdgcn.ds.add.rtn.f32(float addrspace(3)* %src1, float %src2) + call void @llvm.amdgcn.ds.min.f32(float addrspace(3)* %src1, float %res) + %res2 = call float @llvm.amdgcn.ds.min.rtn.f32(float addrspace(3)* %src1, float %res) + call void @llvm.amdgcn.ds.max.f32(float addrspace(3)* %src1, float %res2) + %res3 = call float @llvm.amdgcn.ds.max.rtn.f32(float addrspace(3)* %src1, float %res2) + store float %res3, float addrspace(1)* %out, align 4 + ret void +} + +attributes #0 = { nounwind argmemonly }