diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -106,7 +106,7 @@ if (TLI->isTypeLegal(VT)) UseRC = TLI->getRegClassFor(VT, Node->isDivergent()); - if (!IsClone && !IsCloned) + if (true) for (SDNode *User : Node->uses()) { bool Match = true; if (User->getOpcode() == ISD::CopyToReg && diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -929,6 +929,9 @@ const TargetRegisterClass * SIRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { + if (RC == &AMDGPU::SCC_CLASSRegClass) + return &AMDGPU::SReg_32RegClass; + if (isAGPRClass(RC) && !ST.hasGFX90AInsts()) return getEquivalentVGPRClass(RC); diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -510,13 +510,6 @@ (select SCC, $src1, $src2), [{ if (!Subtarget->hasScalarCompareEq64() || N->isDivergent()) return false; - SDValue SetCC = N->getOperand(0); - for (auto I = SetCC->use_begin(), E = SetCC->use_end(); I != E; ++I) { - if (I->getOpcode() == ISD::CopyToReg) - continue; - if (I->getOpcode() != ISD::SELECT || I.getOperandNo() != 0) - return false; - } return true; }] >; diff --git a/llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll b/llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll --- a/llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll +++ b/llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll @@ -30,11 +30,12 @@ ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_add_co_u32_e64 v0, s[4:5], s6, s6 ; GFX9-NEXT: s_cmp_lg_u64 s[4:5], 0 -; GFX9-NEXT: s_addc_u32 s4, s6, 0 -; GFX9-NEXT: v_mov_b32_e32 v1, s4 -; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 +; GFX9-NEXT: s_addc_u32 s7, s6, 0 +; GFX9-NEXT: s_cselect_b64 s[4:5], -1, 0 +; GFX9-NEXT: s_and_b64 s[4:5], s[4:5], exec +; GFX9-NEXT: s_cselect_b32 s4, s7, 0 ; GFX9-NEXT: s_cmp_gt_u32 s6, 31 -; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX9-NEXT: v_mov_b32_e32 v1, s4 ; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -50,10 +51,11 @@ ; GFX10-NEXT: s_cmpk_lg_u32 s5, 0x0 ; GFX10-NEXT: s_addc_u32 s5, s4, 0 ; GFX10-NEXT: s_cselect_b32 s6, -1, 0 +; GFX10-NEXT: s_and_b32 s6, s6, exec_lo +; GFX10-NEXT: s_cselect_b32 s5, s5, 0 ; GFX10-NEXT: s_cmp_gt_u32 s4, 31 -; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, s5, s6 ; GFX10-NEXT: s_cselect_b32 vcc_lo, -1, 0 -; GFX10-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v0, s5, v0, vcc_lo ; GFX10-NEXT: s_setpc_b64 s[30:31] bb: %i = load volatile i32, i32 addrspace(4)* null, align 8 diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll --- a/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll +++ b/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll @@ -60,16 +60,19 @@ ; GCN-LABEL: {{^}}double5_extelt: ; GCN-NOT: buffer_ -; GCN: s_cmp_eq_u32 [[IDX:s[0-9]+]], 3 -; GCN: s_cselect_b64 vcc, -1, 0 -; GCN: s_cmp_eq_u32 [[IDX]], 4 -; GCN: s_cselect_b64 s[{{[0-9]+:[0-9]+}}], -1, 0 -; GCN: s_cmp_eq_u32 [[IDX]], 1 +; GCN: s_cmp_eq_u32 [[IDX:s[0-9]+]], 1 ; GCN: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, {{[^,]+}} ; GCN: s_cselect_b32 s{{[0-9]+}}, s{{[0-9]+}}, {{[^,]+}} ; GCN: s_cmp_eq_u32 [[IDX]], 2 ; GCN: s_cselect_b32 s{{[0-9]+}}, {{[^,]+}}, s{{[0-9]+}} ; GCN: s_cselect_b32 s{{[0-9]+}}, {{[^,]+}}, s{{[0-9]+}} +; GCN: s_cmp_eq_u32 [[IDX]], 3 +; GCN: s_cselect_b64 s[{{[0-9]+:[0-9]+}}], -1, 0 +; GCN: s_cselect_b32 s{{[0-9]+}}, {{[^,]+}}, s{{[0-9]+}} +; GCN: s_cmp_eq_u32 [[IDX]], 4 +; GCN: s_cselect_b64 s[{{[0-9]+:[0-9]+}}], -1, 0 +; GCN: s_cselect_b32 s{{[0-9]+}}, {{[^,]+}}, s{{[0-9]+}} +; GCN: s_cselect_b32 s{{[0-9]+}}, {{[^,]+}}, s{{[0-9]+}} ; GCN: store_dwordx2 v[{{[0-9:]+}}] define amdgpu_kernel void @double5_extelt(double addrspace(1)* %out, i32 %sel) { entry: diff --git a/llvm/test/CodeGen/AMDGPU/setcc-multiple-use.ll b/llvm/test/CodeGen/AMDGPU/setcc-multiple-use.ll --- a/llvm/test/CodeGen/AMDGPU/setcc-multiple-use.ll +++ b/llvm/test/CodeGen/AMDGPU/setcc-multiple-use.ll @@ -18,7 +18,9 @@ ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; CHECK-NEXT: s_cmpk_lg_u32 vcc_lo, 0x0 ; CHECK-NEXT: s_subb_u32 s4, 1, 0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, s4, vcc_lo +; CHECK-NEXT: s_and_b32 s5, vcc_lo, exec_lo +; CHECK-NEXT: s_cselect_b32 s4, s4, 0 +; CHECK-NEXT: v_mov_b32_e32 v0, s4 ; CHECK-NEXT: s_setpc_b64 s[30:31] bb: %i = load i32, i32 addrspace(3)* null, align 16