diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1590,6 +1590,7 @@
 }
 
 bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
   MachineBasicBlock &MBB = *MI.getParent();
   DebugLoc DL = MBB.findDebugLoc(MI);
   switch (MI.getOpcode()) {
@@ -1675,8 +1676,8 @@
   case AMDGPU::V_SET_INACTIVE_B32: {
     unsigned NotOpc = ST.isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
     unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
-    BuildMI(MBB, MI, DL, get(NotOpc), Exec)
-      .addReg(Exec);
+    auto FirstNot = BuildMI(MBB, MI, DL, get(NotOpc), Exec).addReg(Exec);
+    FirstNot->addRegisterDead(AMDGPU::SCC, TRI); // SCC is overwritten
     BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), MI.getOperand(0).getReg())
       .add(MI.getOperand(2));
     BuildMI(MBB, MI, DL, get(NotOpc), Exec)
@@ -1687,8 +1688,8 @@
   case AMDGPU::V_SET_INACTIVE_B64: {
     unsigned NotOpc = ST.isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
     unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
-    BuildMI(MBB, MI, DL, get(NotOpc), Exec)
-      .addReg(Exec);
+    auto FirstNot = BuildMI(MBB, MI, DL, get(NotOpc), Exec).addReg(Exec);
+    FirstNot->addRegisterDead(AMDGPU::SCC, TRI); // SCC is overwritten
     MachineInstr *Copy = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO),
                                  MI.getOperand(0).getReg())
       .add(MI.getOperand(2));
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -145,6 +145,7 @@
 
 // Invert the exec mask and overwrite the inactive lanes of dst with inactive,
 // restoring it after we're done.
+let Defs = [SCC] in {
 def V_SET_INACTIVE_B32 : VPseudoInstSI <(outs VGPR_32:$vdst),
   (ins VGPR_32: $src, VSrc_b32:$inactive),
   [(set i32:$vdst, (int_amdgcn_set_inactive i32:$src, i32:$inactive))]> {
@@ -156,6 +157,7 @@
   [(set i64:$vdst, (int_amdgcn_set_inactive i64:$src, i64:$inactive))]> {
   let Constraints = "$src = $vdst";
 }
+} // End Defs = [SCC]
 
 let usesCustomInserter = 1, Defs = [VCC, EXEC] in {
 def V_ADD_U64_PSEUDO : VPseudoInstSI <
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
@@ -1 +1,103 @@
-; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %S/../llvm.amdgcn.set.inactive.ll |  FileCheck -check-prefix=GCN %S/../llvm.amdgcn.set.inactive.ll
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+define amdgpu_kernel void @set_inactive(i32 addrspace(1)* %out, i32 %in) {
+; GCN-LABEL: set_inactive:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
+; GCN-NEXT:    s_load_dword s0, s[0:1], 0x2c
+; GCN-NEXT:    s_mov_b32 s6, -1
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v0, s0
+; GCN-NEXT:    s_not_b64 exec, exec
+; GCN-NEXT:    v_mov_b32_e32 v0, 42
+; GCN-NEXT:    s_not_b64 exec, exec
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT:    s_endpgm
+  %tmp = call i32 @llvm.amdgcn.set.inactive.i32(i32 %in, i32 42) #0
+  store i32 %tmp, i32 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @set_inactive_64(i64 addrspace(1)* %out, i64 %in) {
+; GCN-LABEL: set_inactive_64:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v0, s2
+; GCN-NEXT:    v_mov_b32_e32 v1, s3
+; GCN-NEXT:    s_not_b64 exec, exec
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    v_mov_b32_e32 v1, 0
+; GCN-NEXT:    s_not_b64 exec, exec
+; GCN-NEXT:    v_mov_b32_e32 v3, s1
+; GCN-NEXT:    v_mov_b32_e32 v2, s0
+; GCN-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; GCN-NEXT:    s_endpgm
+  %tmp = call i64 @llvm.amdgcn.set.inactive.i64(i64 %in, i64 0) #0
+  store i64 %tmp, i64 addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @set_inactive_scc(i32 addrspace(1)* %out, i32 %in, <4 x i32> inreg %desc) {
+; GCN-LABEL: set_inactive_scc:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_buffer_load_dword s2, s[4:7], 0x0
+; GCN-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
+; GCN-NEXT:    s_load_dword s0, s[0:1], 0x2c
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_cmp_lg_u32 s2, 56
+; GCN-NEXT:    v_mov_b32_e32 v0, s0
+; GCN-NEXT:    s_cselect_b32 s0, 1, 0
+; GCN-NEXT:    s_not_b64 exec, exec
+; GCN-NEXT:    v_mov_b32_e32 v0, 42
+; GCN-NEXT:    s_not_b64 exec, exec
+; GCN-NEXT:    s_and_b32 s0, s0, 1
+; GCN-NEXT:    s_cmp_lg_u32 s0, 0
+; GCN-NEXT:    s_cbranch_scc0 BB2_2
+; GCN-NEXT:  ; %bb.1: ; %.one
+; GCN-NEXT:    v_add_u32_e32 v1, vcc, 1, v0
+; GCN-NEXT:    s_mov_b32 s6, -1
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    s_mov_b32 s0, 0
+; GCN-NEXT:    buffer_store_dword v1, off, s[4:7], 0
+; GCN-NEXT:    s_branch BB2_3
+; GCN-NEXT:  BB2_2:
+; GCN-NEXT:    s_mov_b32 s0, -1
+; GCN-NEXT:  BB2_3: ; %Flow
+; GCN-NEXT:    s_xor_b32 s0, s0, -1
+; GCN-NEXT:    s_and_b32 s0, s0, 1
+; GCN-NEXT:    s_cmp_lg_u32 s0, 0
+; GCN-NEXT:    s_cbranch_scc1 BB2_5
+; GCN-NEXT:  ; %bb.4: ; %.zero
+; GCN-NEXT:    s_mov_b32 s6, -1
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT:  BB2_5: ; %.exit
+; GCN-NEXT:    s_endpgm
+  %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 0, i32 0)
+  %cmp = icmp eq i32 %val, 56
+  %tmp = call i32 @llvm.amdgcn.set.inactive.i32(i32 %in, i32 42) #0
+  br i1 %cmp, label %.zero, label %.one
+
+.zero:
+  store i32 %tmp, i32 addrspace(1)* %out
+  br label %.exit
+
+.one:
+  %tmp.1 = add i32 %tmp, 1
+  store i32 %tmp.1, i32 addrspace(1)* %out
+  br label %.exit
+
+.exit:
+  ret void
+}
+
+declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #0
+declare i64 @llvm.amdgcn.set.inactive.i64(i64, i64) #0
+declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32)
+
+attributes #0 = { convergent readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll
@@ -23,7 +23,31 @@
   ret void
 }
 
+; GCN-LABEL: {{^}}set_inactive_scc:
+; GCN: s_cmp
+; GCN-NOT: s_not
+; GCN: s_cbranch_scc
+define amdgpu_kernel void @set_inactive_scc(i32 addrspace(1)* %out, i32 %in, <4 x i32> inreg %desc) {
+  %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 0, i32 0)
+  %cmp = icmp eq i32 %val, 56
+  %tmp = call i32 @llvm.amdgcn.set.inactive.i32(i32 %in, i32 42) #0
+  br i1 %cmp, label %.zero, label %.one
+
+.zero:
+  store i32 %tmp, i32 addrspace(1)* %out
+  br label %.exit
+
+.one:
+  %tmp.1 = add i32 %tmp, 1
+  store i32 %tmp.1, i32 addrspace(1)* %out
+  br label %.exit
+
+.exit:
+  ret void
+}
+
 declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #0
 declare i64 @llvm.amdgcn.set.inactive.i64(i64, i64) #0
+declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32)
 
 attributes #0 = { convergent readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.mir b/llvm/test/CodeGen/AMDGPU/wqm.mir
--- a/llvm/test/CodeGen/AMDGPU/wqm.mir
+++ b/llvm/test/CodeGen/AMDGPU/wqm.mir
@@ -99,7 +99,7 @@
     %8:vreg_64 = BUFFER_LOAD_DWORDX2_OFFSET %6, %7, 0, 0, 0, 0, 0, 0, implicit $exec
     %16:vgpr_32 = COPY %8.sub1
     %11:vgpr_32 = COPY %16
-    %10:vgpr_32 = V_SET_INACTIVE_B32 %11, undef %12:sreg_32, implicit $exec
+    %10:vgpr_32 = V_SET_INACTIVE_B32 %11, undef %12:sreg_32, implicit $exec, implicit-def $scc
     %14:vgpr_32 = COPY %7
     %13:vgpr_32 = V_MOV_B32_dpp %14, killed %10, 323, 12, 15, 0, implicit $exec
     early-clobber %15:vgpr_32 = WWM killed %13, implicit $exec