diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -1805,11 +1805,30 @@ } auto DbgValsToSink = DbgValsToSinkMap.takeVector(); + LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccBB); + + MachineBasicBlock::iterator InsertPos = + SuccBB->SkipPHIsAndLabels(SuccBB->begin()); + if (InsertPos != SuccBB->begin()) { + bool UsesClobbered = false; + for (MachineBasicBlock::iterator PI = SuccBB->begin(); + PI != InsertPos && !UsesClobbered; ++PI) { + for (auto U : UsedOpsInCopy) { + Register SrcReg = MI.getOperand(U).getReg(); + if (PI->modifiesRegister(SrcReg, TRI)) { + LLVM_DEBUG(dbgs() << " *** Not sinking: prologue clobbers uses.\n"); + UsesClobbered = true; + break; + } + } + } + if (UsesClobbered) + continue; + } + // Clear the kill flag if SrcReg is killed between MI and the end of the // block. clearKillFlags(&MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI); - MachineBasicBlock::iterator InsertPos = - SuccBB->SkipPHIsAndLabels(SuccBB->begin()); performSink(MI, *SuccBB, InsertPos, DbgValsToSink); updateLiveIn(&MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy); diff --git a/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir b/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir --- a/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir +++ b/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir @@ -17,16 +17,16 @@ ; GFX10-NEXT: successors: %bb.1(0x80000000) ; GFX10-NEXT: liveins: $sgpr4 ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: renamable $sgpr9 = COPY $sgpr4 ; GFX10-NEXT: renamable $vgpr5 = IMPLICIT_DEF ; GFX10-NEXT: renamable $sgpr0_sgpr1 = nofpexcept V_CMP_NGT_F32_e64 0, 0, 0, $vgpr5, 0, implicit $mode, implicit $exec ; GFX10-NEXT: S_BRANCH %bb.1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) - ; GFX10-NEXT: liveins: $sgpr4:0x0000000000000003, $sgpr6, $sgpr0_sgpr1 + ; GFX10-NEXT: liveins: $sgpr6, $sgpr9, $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr0_sgpr1, implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX10-NEXT: renamable $sgpr9 = COPY $sgpr4 ; GFX10-NEXT: renamable $sgpr14_sgpr15 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc ; GFX10-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec ; GFX10-NEXT: S_BRANCH %bb.2