Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -2153,10 +2153,12 @@ // SOP1 Patterns //===----------------------------------------------------------------------===// +// Copy is workaround for multiple results of S_BCNT1_I32_B64 due to +// implicit scc def. def : Pat < (i64 (ctpop i64:$src)), (i64 (REG_SEQUENCE SReg_64, - (S_BCNT1_I32_B64 $src), sub0, + (i32 (COPY (S_BCNT1_I32_B64 $src))), sub0, (S_MOV_B32 0), sub1)) >; @@ -3149,10 +3151,14 @@ def : ZExt_i64_i1_Pat; def : ZExt_i64_i1_Pat; +// The copy is a workaround for the multiple outputs of +// s_ashr_i32. The REG_SEQUENCE expansion adds both the normal output +// and implicit def of scc node results to the operands of +// REG_SEQUENCE which breaks. def : Pat < (i64 (sext i32:$src)), (REG_SEQUENCE SReg_64, $src, sub0, - (S_ASHR_I32 $src, 31), sub1) + (i32 (COPY (S_ASHR_I32 $src, 31))), sub1) >; def : Pat < Index: lib/Target/AMDGPU/SIRegisterInfo.td =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.td +++ lib/Target/AMDGPU/SIRegisterInfo.td @@ -183,7 +183,7 @@ } // Special register classes for predicates and the M0 register -def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)> { +def SCCReg : RegisterClass<"AMDGPU", [i1], 32, (add SCC)> { let isAllocatable = 0; let CopyCost = -1; // Theoretically it is possible to read from SCC, // but it should never be necessary.