Index: lib/Target/AMDGPU/SIInstructions.td
===================================================================
--- lib/Target/AMDGPU/SIInstructions.td
+++ lib/Target/AMDGPU/SIInstructions.td
@@ -2153,10 +2153,12 @@
 // SOP1 Patterns
 //===----------------------------------------------------------------------===//
 
+// Copy is workaround for multiple results of S_BCNT1_I32_B64 due to
+// implicit scc def.
 def : Pat <
   (i64 (ctpop i64:$src)),
     (i64 (REG_SEQUENCE SReg_64,
-     (S_BCNT1_I32_B64 $src), sub0,
+     (i32 (COPY (S_BCNT1_I32_B64 $src))), sub0,
      (S_MOV_B32 0), sub1))
 >;
 
@@ -3149,10 +3151,14 @@
 def : ZExt_i64_i1_Pat<zext>;
 def : ZExt_i64_i1_Pat<anyext>;
 
+// The copy is a workaround for the multiple outputs of
+// s_ashr_i32. The REG_SEQUENCE expansion adds both the normal output
+// and implicit def of scc node results to the operands of
+// REG_SEQUENCE which breaks.
 def : Pat <
   (i64 (sext i32:$src)),
     (REG_SEQUENCE SReg_64, $src, sub0,
-    (S_ASHR_I32 $src, 31), sub1)
+    (i32 (COPY (S_ASHR_I32 $src, 31))), sub1)
 >;
 
 def : Pat <
Index: lib/Target/AMDGPU/SIRegisterInfo.td
===================================================================
--- lib/Target/AMDGPU/SIRegisterInfo.td
+++ lib/Target/AMDGPU/SIRegisterInfo.td
@@ -183,7 +183,7 @@
 }
 
 // Special register classes for predicates and the M0 register
-def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)> {
+def SCCReg : RegisterClass<"AMDGPU", [i1], 32, (add SCC)> {
   let isAllocatable = 0;
   let CopyCost = -1; // Theoretically it is possible to read from SCC,
                      // but it should never be necessary.