diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -2177,7 +2177,7 @@
 // v_mov_b32 <dest> <old>
 // v_mov_b32 <dest> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
 def int_amdgcn_update_dpp :
-  Intrinsic<[llvm_anyint_ty],
+  Intrinsic<[llvm_any_ty],
             [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty,
             llvm_i32_ty, llvm_i32_ty, llvm_i1_ty],
              [IntrNoMem, IntrConvergent, IntrWillReturn,
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -1193,8 +1193,8 @@
                        (as_i1timm $bound_ctrl))
 >;
 
-def : GCNPat <
-  (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, timm:$dpp_ctrl,
+class UpdateDPPPat<ValueType vt> : GCNPat <
+  (vt (int_amdgcn_update_dpp vt:$old, vt:$src, timm:$dpp_ctrl,
                               timm:$row_mask, timm:$bank_mask,
                               timm:$bound_ctrl)),
   (V_MOV_B32_dpp VGPR_32:$old, VGPR_32:$src, (as_i32timm $dpp_ctrl),
@@ -1202,6 +1202,9 @@
                  (as_i1timm $bound_ctrl))
 >;
 
+def : UpdateDPPPat<i32>;
+def : UpdateDPPPat<f32>;
+
 } // End OtherPredicates = [isGFX8Plus]
 
 let OtherPredicates = [isGFX8Plus] in {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll
@@ -103,9 +103,128 @@
   ret void
 }
 
+; GCN-LABEL: {{^}}dpp_test_f32:
+; GCN:  v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
+; GCN:  v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
+; GFX8-OPT: s_mov
+; GFX8-OPT: s_mov
+; GFX8-NOOPT: s_nop 1
+; GCN:  v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}}
+define amdgpu_kernel void @dpp_test_f32(ptr addrspace(1) %out, float %in1, float %in2) {
+  %tmp0 = call float @llvm.amdgcn.update.dpp.f32(float %in1, float %in2, i32 1, i32 1, i32 1, i1 0) #0
+  store float %tmp0, ptr addrspace(1) %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}dpp_test_f32_imm_comb1:
+; GCN:  v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
+; GCN:  v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
+; GFX8-OPT: s_mov
+; GFX8-OPT: s_mov
+; GFX8-NOOPT: s_nop 1
+; GCN:  v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[0,0,0,0] row_mask:0x0 bank_mask:0x0{{$}}
+define amdgpu_kernel void @dpp_test_f32_imm_comb1(ptr addrspace(1) %out, float %in1, float %in2) {
+  %tmp0 = call float @llvm.amdgcn.update.dpp.f32(float %in1, float %in2, i32 0, i32 0, i32 0, i1 0) #0
+  store float %tmp0, ptr addrspace(1) %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}dpp_test_f32_imm_comb2:
+; GCN:  v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
+; GCN:  v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
+; GFX8-OPT: s_mov
+; GFX8-OPT: s_mov
+; GFX8-NOOPT: s_nop 1
+; GCN:  v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,0,0,0] row_mask:0x3 bank_mask:0x3{{$}}
+define amdgpu_kernel void @dpp_test_f32_imm_comb2(ptr addrspace(1) %out, float %in1, float %in2) {
+  %tmp0 = call float @llvm.amdgcn.update.dpp.f32(float %in1, float %in2, i32 3, i32 3, i32 3, i1 0) #0
+  store float %tmp0, ptr addrspace(1) %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}dpp_test_f32_imm_comb3:
+; GCN:  v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
+; GCN:  v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
+; GFX8-OPT: s_mov
+; GFX8-OPT: s_mov
+; GFX8-NOOPT: s_nop 1
+; GCN:  v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[1,0,0,0] row_mask:0x2 bank_mask:0x3 bound_ctrl:1{{$}}
+define amdgpu_kernel void @dpp_test_f32_imm_comb3(ptr addrspace(1) %out, float %in1, float %in2) {
+  %tmp0 = call float @llvm.amdgcn.update.dpp.f32(float %in1, float %in2, i32 1, i32 2, i32 3, i1 1) #0
+  store float %tmp0, ptr addrspace(1) %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}dpp_test_f32_imm_comb4:
+; GCN:  v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
+; GCN:  v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
+; GFX8-OPT: s_mov
+; GFX8-OPT: s_mov
+; GFX8-NOOPT: s_nop 1
+; GCN:  v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[0,1,0,0] row_mask:0x3 bank_mask:0x2 bound_ctrl:1{{$}}
+define amdgpu_kernel void @dpp_test_f32_imm_comb4(ptr addrspace(1) %out, float %in1, float %in2) {
+  %tmp0 = call float @llvm.amdgcn.update.dpp.f32(float %in1, float %in2, i32 4, i32 3, i32 2, i1 1) #0
+  store float %tmp0, ptr addrspace(1) %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}dpp_test_f32_imm_comb5:
+; GCN:  v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
+; GCN:  v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
+; GFX8-OPT: s_mov
+; GFX8-OPT: s_mov
+; GFX8-NOOPT: s_nop 1
+; GCN:  v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,3,0] row_mask:0xe bank_mask:0xd bound_ctrl:1{{$}}
+define amdgpu_kernel void @dpp_test_f32_imm_comb5(ptr addrspace(1) %out, float %in1, float %in2) {
+  %tmp0 = call float @llvm.amdgcn.update.dpp.f32(float %in1, float %in2, i32 63, i32 62, i32 61, i1 1) #0
+  store float %tmp0, ptr addrspace(1) %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}dpp_test_f32_imm_comb6:
+; GCN:  v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
+; GCN:  v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
+; GFX8-OPT: s_mov
+; GFX8-OPT: s_mov
+; GFX8-NOOPT: s_nop 1
+; GCN:  v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,3,0] row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}}
+define amdgpu_kernel void @dpp_test_f32_imm_comb6(ptr addrspace(1) %out, float %in1, float %in2) {
+  %tmp0 = call float @llvm.amdgcn.update.dpp.f32(float %in1, float %in2, i32 63, i32 63, i32 63, i1 1) #0
+  store float %tmp0, ptr addrspace(1) %out
+  ret void
+}
+
+
+; GCN-LABEL: {{^}}dpp_test_f32_imm_comb7:
+; GCN:  v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
+; GCN:  v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
+; GFX8-OPT: s_mov
+; GFX8-OPT: s_mov
+; GFX8-NOOPT: s_nop 1
+; GCN:  v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[0,0,0,1] row_mask:0x0 bank_mask:0x0 bound_ctrl:1{{$}}
+define amdgpu_kernel void @dpp_test_f32_imm_comb7(ptr addrspace(1) %out, float %in1, float %in2) {
+  %tmp0 = call float @llvm.amdgcn.update.dpp.f32(float %in1, float %in2, i32 64, i32 64, i32 64, i1 1) #0
+  store float %tmp0, ptr addrspace(1) %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}dpp_test_f32_imm_comb8:
+; GCN:  v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}}
+; GCN:  v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}}
+; GFX8-OPT: s_mov
+; GFX8-OPT: s_mov
+; GFX8-NOOPT: s_nop 1
+; GCN:  v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,1,0] row_mask:0xf bank_mask:0x0 bound_ctrl:1{{$}}
+define amdgpu_kernel void @dpp_test_f32_imm_comb8(ptr addrspace(1) %out, float %in1, float %in2) {
+  %tmp0 = call float @llvm.amdgcn.update.dpp.f32(float %in1, float %in2, i32 31, i32 63, i32 128, i1 1) #0
+  store float %tmp0, ptr addrspace(1) %out
+  ret void
+}
+
 declare i32 @llvm.amdgcn.workitem.id.x()
 declare void @llvm.amdgcn.s.barrier()
 declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1) #0
+declare float @llvm.amdgcn.update.dpp.f32(float, float, i32, i32, i32, i1) #0
 declare i64 @llvm.amdgcn.update.dpp.i64(i64, i64, i32, i32, i32, i1) #0
 
 attributes #0 = { nounwind readnone convergent }