diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2177,7 +2177,7 @@ // v_mov_b32 // v_mov_b32 def int_amdgcn_update_dpp : - Intrinsic<[llvm_anyint_ty], + Intrinsic<[llvm_any_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty], [IntrNoMem, IntrConvergent, IntrWillReturn, diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -1202,6 +1202,15 @@ (as_i1timm $bound_ctrl)) >; +def : GCNPat < + (f32 (int_amdgcn_update_dpp f32:$old, f32:$src, timm:$dpp_ctrl, + timm:$row_mask, timm:$bank_mask, + timm:$bound_ctrl)), + (V_MOV_B32_dpp VGPR_32:$old, VGPR_32:$src, (as_i32timm $dpp_ctrl), + (as_i32timm $row_mask), (as_i32timm $bank_mask), + (as_i1timm $bound_ctrl)) +>; + } // End OtherPredicates = [isGFX8Plus] let OtherPredicates = [isGFX8Plus] in { diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll @@ -103,9 +103,23 @@ ret void } +; GCN-LABEL: {{^}}dpp_test_f32: +; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}} +; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} +; GFX8-OPT: s_mov +; GFX8-OPT: s_mov +; GFX8-NOOPT: s_nop 1 +; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} +define amdgpu_kernel void @dpp_test_f32(ptr addrspace(1) %out, float %in1, float %in2) { + %tmp0 = call float @llvm.amdgcn.update.dpp.f32(float %in1, float %in2, i32 1, i32 1, i32 1, i1 0) #0 + store float %tmp0, ptr addrspace(1) %out + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() declare void @llvm.amdgcn.s.barrier() declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1) #0 +declare float @llvm.amdgcn.update.dpp.f32(float, float, i32, i32, i32, i1) #0 declare i64 @llvm.amdgcn.update.dpp.i64(i64, i64, i32, i32, i32, i1) #0 attributes #0 = { nounwind readnone convergent }