Index: lib/Transforms/InstCombine/InstCombineCalls.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3474,6 +3474,19 @@
     // amdgcn.kill(i1 1) is a no-op
     return eraseInstFromFunction(CI);
   }
+  case Intrinsic::amdgcn_update_dpp: {
+    Value *Old = II->getArgOperand(0);
+
+    if (cast<Constant>(II->getArgOperand(5))->isZeroValue() ||
+        cast<ConstantInt>(II->getArgOperand(3))->getZExtValue() != 0xF ||
+        cast<ConstantInt>(II->getArgOperand(4))->getZExtValue() != 0xF ||
+        isa<UndefValue>(Old))
+      break;
+
+    // If bound_ctrl = 1, row mask = bank mask = 0xf we can omit old value.
+    II->setOperand(0, UndefValue::get(Old->getType()));
+    return II;
+  }
   case Intrinsic::stackrestore: {
     // If the save is right next to the restore, remove the restore.  This can
     // happen when variable allocas are DCE'd.
Index: test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll
+++ test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll
@@ -1,13 +1,64 @@
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI -check-prefix=VI-OPT %s
-; RUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOOPT %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GCN-NOOPT,GFX8 %s
+; RUN: opt -S -O1 < %s | FileCheck --check-prefix=OPT %s
+; RUN: opt -S -O1 < %s | llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global | FileCheck --check-prefixes=GCN,GCN-OPT,GFX8 %s
 
-; VI-LABEL: {{^}}dpp_test:
-; VI: v_mov_b32_e32 v0, s{{[0-9]+}}
-; VI: v_mov_b32_e32 v1, s{{[0-9]+}}
-; VI: s_nop 1
-; VI: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x01,0x08,0x11]
+; GCN-LABEL: {{^}}dpp_test:
+; OPT-LABEL: {{^}}define amdgpu_kernel void @dpp_test(
+; GCN: s_load_dword [[SDST:s[0-9]+]], s[0:1], 0x2c
+; GCN: s_load_dword [[SSRC:s[0-9]+]], s[0:1], 0x30
+; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], [[SDST]]
+; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], [[SSRC]]
+; GCN: s_nop 1
+; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}}
+; OPT: @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 false)
 define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %in2) {
-  %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 1) #0
+  %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 0) #0
+  store i32 %tmp0, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}dpp_test_bc:
+; OPT-LABEL: {{^}}define amdgpu_kernel void @dpp_test_bc(
+; GCN: s_load_dword [[SDST:s[0-9]+]], s[0:1], 0x2c
+; GCN: s_load_dword [[SSRC:s[0-9]+]], s[0:1], 0x30
+; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], [[SDST]]
+; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], [[SSRC]]
+; GCN: s_nop 1
+; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[2,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0{{$}}
+; OPT: @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 2, i32 1, i32 1, i1 true)
+define amdgpu_kernel void @dpp_test_bc(i32 addrspace(1)* %out, i32 %in1, i32 %in2) {
+  %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 2, i32 1, i32 1, i1 1) #0
+  store i32 %tmp0, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}dpp_noold_test:
+; OPT-LABEL: {{^}}define amdgpu_kernel void @dpp_noold_test(
+; GCN-NOOPT:   s_load_dword [[SDST:s[0-9]+]], s[0:1], 0x2c
+; GCN:         s_load_dword [[SSRC:s[0-9]+]], s[0:1], 0x30
+; GCN-NOOPT:   v_mov_b32_e32 [[DST:v[0-9]+]], [[SDST]]
+; GCN-OPT-NOT: v_mov_b32_e32
+; GCN:         v_mov_b32_e32 [[SRC:v[0-9]+]], [[SSRC]]
+; GCN:         s_nop 1
+; GCN-NOOPT:   v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:0{{$}}
+; GCN-OPT:     v_mov_b32_dpp v{{[0-9]+}}, [[SRC]] quad_perm:[3,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:0{{$}}
+; OPT:         @llvm.amdgcn.update.dpp.i32(i32 undef, i32 %in2, i32 3, i32 15, i32 15, i1 true)
+define amdgpu_kernel void @dpp_noold_test(i32 addrspace(1)* %out, i32 %in1, i32 %in2) {
+  %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 3, i32 15, i32 15, i1 1) #0
+  store i32 %tmp0, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}dpp_undef_old_test:
+; OPT-LABEL: {{^}}define amdgpu_kernel void @dpp_undef_old_test(
+; GCN:     s_load_dword [[SSRC:s[0-9]+]], s[0:1], 0x2c
+; GCN-NOT: v_mov_b32_e32
+; GCN:     v_mov_b32_e32 [[SRC:v[0-9]+]], [[SSRC]]
+; GCN:     s_nop 1
+; GCN:     v_mov_b32_dpp v{{[0-9]+}}, [[SRC]] quad_perm:[0,1,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:0{{$}}
+; OPT:     @llvm.amdgcn.update.dpp.i32(i32 undef, i32 %in1, i32 4, i32 15, i32 15, i1 true)
+define amdgpu_kernel void @dpp_undef_old_test(i32 addrspace(1)* %out, i32 %in1) {
+  %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 %in1, i32 4, i32 15, i32 15, i1 1) #0
   store i32 %tmp0, i32 addrspace(1)* %out
   ret void
 }