diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -440,12 +440,17 @@ /// CheckForLiveRegDef - Return true and update live register vector if the /// specified register def of the specified SUnit clobbers any "live" registers. static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, - std::vector &LiveRegDefs, + std::vector &LiveRegDefs, SmallSet &RegAdded, SmallVectorImpl &LRegs, - const TargetRegisterInfo *TRI) { + const TargetRegisterInfo *TRI, + const SDNode *Node = nullptr) { bool Added = false; for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + // Allow for multiple uses of same def + if (Node && LiveRegDefs[*AI] && LiveRegDefs[*AI]->getNode() == Node) + continue; + if (LiveRegDefs[*AI] && LiveRegDefs[*AI] != SU) { if (RegAdded.insert(*AI).second) { LRegs.push_back(*AI); @@ -502,6 +507,15 @@ } continue; } + + if (Node->getOpcode() == ISD::CopyToReg) { + Register Reg = cast(Node->getOperand(1))->getReg(); + if (Reg.isPhysical()) { + SDNode *SrcNode = Node->getOperand(2).getNode(); + CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI, SrcNode); + } + } + if (!Node->isMachineOpcode()) continue; const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -1294,11 +1294,11 @@ /// CheckForLiveRegDef - Return true and update live register vector if the /// specified register def of the specified SUnit clobbers any "live" registers. -static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, - SUnit **LiveRegDefs, +static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SUnit **LiveRegDefs, SmallSet &RegAdded, SmallVectorImpl &LRegs, - const TargetRegisterInfo *TRI) { + const TargetRegisterInfo *TRI, + const SDNode *Node = nullptr) { for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) { // Check if Ref is live. @@ -1307,6 +1307,10 @@ // Allow multiple uses of the same def. if (LiveRegDefs[*AliasI] == SU) continue; + // Allow multiple uses of same def + if (Node && LiveRegDefs[*AliasI]->getNode() == Node) + continue; + // Add Reg to the set of interfering live regs. if (RegAdded.insert(*AliasI).second) { LRegs.push_back(*AliasI); @@ -1387,6 +1391,15 @@ continue; } + if (Node->getOpcode() == ISD::CopyToReg) { + Register Reg = cast(Node->getOperand(1))->getReg(); + if (Reg.isPhysical()) { + SDNode *SrcNode = Node->getOperand(2).getNode(); + CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI, + SrcNode); + } + } + if (!Node->isMachineOpcode()) continue; // If we're in the middle of scheduling a call, don't begin scheduling diff --git a/llvm/test/CodeGen/AMDGPU/copy-to-reg-scc-clobber.ll b/llvm/test/CodeGen/AMDGPU/copy-to-reg-scc-clobber.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/copy-to-reg-scc-clobber.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx90a -O3 < %s | FileCheck -check-prefix=RRLIST %s +; RUN: llc -march=amdgcn -mcpu=gfx90a -O3 -pre-RA-sched=fast < %s | FileCheck -check-prefix=FAST %s + + +define protected amdgpu_kernel void @sccClobber(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %e, ptr addrspace(1) %f, ptr addrspace(1) %pout.coerce) { +; RRLIST-LABEL: sccClobber: +; RRLIST: ; %bb.0: ; %entry +; RRLIST-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x24 +; RRLIST-NEXT: v_mov_b32_e32 v2, 0 +; RRLIST-NEXT: s_waitcnt lgkmcnt(0) +; RRLIST-NEXT: s_load_dword s16, s[8:9], 0x0 +; RRLIST-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 +; RRLIST-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x0 +; RRLIST-NEXT: s_load_dwordx2 s[14:15], s[0:1], 0x44 +; RRLIST-NEXT: s_load_dword s17, s[10:11], 0x0 +; RRLIST-NEXT: s_waitcnt lgkmcnt(0) +; RRLIST-NEXT: s_min_i32 s4, s16, 0 +; RRLIST-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; RRLIST-NEXT: v_cmp_lt_i64_e32 vcc, s[12:13], v[0:1] +; RRLIST-NEXT: s_and_b64 s[0:1], vcc, exec +; RRLIST-NEXT: s_cselect_b32 s0, s16, s17 +; RRLIST-NEXT: s_cmp_eq_u64 s[12:13], s[2:3] +; RRLIST-NEXT: s_cselect_b32 s0, s4, s0 +; RRLIST-NEXT: v_mov_b32_e32 v0, s0 +; RRLIST-NEXT: global_store_dword v2, v0, s[14:15] +; RRLIST-NEXT: s_endpgm +; +; FAST-LABEL: sccClobber: +; FAST: ; %bb.0: ; %entry +; FAST-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x24 +; FAST-NEXT: v_mov_b32_e32 v2, 0 +; FAST-NEXT: s_waitcnt lgkmcnt(0) +; FAST-NEXT: s_load_dword s16, s[8:9], 0x0 +; FAST-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 +; FAST-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x0 +; FAST-NEXT: s_load_dwordx2 s[14:15], s[0:1], 0x44 +; FAST-NEXT: s_load_dword s17, s[10:11], 0x0 +; FAST-NEXT: s_waitcnt lgkmcnt(0) +; FAST-NEXT: s_min_i32 s4, s16, 0 +; FAST-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] +; FAST-NEXT: v_cmp_lt_i64_e32 vcc, s[12:13], v[0:1] +; FAST-NEXT: s_and_b64 s[0:1], vcc, exec +; FAST-NEXT: s_cselect_b32 s0, s16, s17 +; FAST-NEXT: s_cmp_eq_u64 s[12:13], s[2:3] +; FAST-NEXT: s_cselect_b32 s0, s4, s0 +; FAST-NEXT: v_mov_b32_e32 v0, s0 +; FAST-NEXT: global_store_dword v2, v0, s[14:15] +; FAST-NEXT: s_endpgm +entry: + %0 = load i64, ptr addrspace(1) %a, align 8 + %1 = load i64, ptr addrspace(1) %b, align 8 + %2 = load i32, ptr addrspace(1) %e, align 4 + %3 = load i32, ptr addrspace(1) %f, align 4 + %cmp7.1 = icmp eq i64 %0, %1 + %call.1 = tail call noundef i32 @llvm.smin.i32(i32 noundef 0, i32 noundef %2) + %cmp8.1 = icmp slt i64 %0, %1 + %cond.1 = select i1 %cmp8.1, i32 %2, i32 %3 + %cond14.1 = select i1 %cmp7.1, i32 %call.1, i32 %cond.1 + store i32 %cond14.1, ptr addrspace(1) %pout.coerce, align 4 + ret void +} + +declare i32 @llvm.smin.i32(i32, i32)