diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -1805,11 +1805,30 @@ } auto DbgValsToSink = DbgValsToSinkMap.takeVector(); + LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccBB); + + MachineBasicBlock::iterator InsertPos = + SuccBB->SkipPHIsAndLabels(SuccBB->begin()); + if (InsertPos != SuccBB->begin()) { + bool UsesClobbered = false; + for (MachineBasicBlock::iterator PI = SuccBB->begin(); + PI != InsertPos && !UsesClobbered; ++PI) { + for (auto U : UsedOpsInCopy) { + Register SrcReg = MI.getOperand(U).getReg(); + if (PI->modifiesRegister(SrcReg, TRI)) { + LLVM_DEBUG(dbgs() << " *** Not sinking: prologue clobbers uses.\n"); + UsesClobbered = true; + break; + } + } + } + if (UsesClobbered) + continue; + } + // Clear the kill flag if SrcReg is killed between MI and the end of the // block. clearKillFlags(&MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI); - MachineBasicBlock::iterator InsertPos = - SuccBB->SkipPHIsAndLabels(SuccBB->begin()); performSink(MI, *SuccBB, InsertPos, DbgValsToSink); updateLiveIn(&MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy); diff --git a/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir b/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir @@ -0,0 +1,72 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass=postra-machine-sink -mattr=-wavefrontsize32,+wavefrontsize64 -o - %s | FileCheck -check-prefixes=GFX10 %s + +# Ensure that PostRA Machine Sink does not sink instructions +# past block prologues which would overwrite their uses. + +--- +name: _amdgpu_ps_main +alignment: 1 +tracksRegLiveness: true +registers: [] +liveins: + - { reg: '$sgpr4', virtual-reg: '' } +body: | + ; GFX10-LABEL: name: _amdgpu_ps_main + ; GFX10: bb.0: + ; GFX10-NEXT: successors: %bb.1(0x80000000) + ; GFX10-NEXT: liveins: $sgpr4 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: renamable $sgpr9 = COPY $sgpr4 + ; GFX10-NEXT: renamable $vgpr5 = IMPLICIT_DEF + ; GFX10-NEXT: renamable $sgpr0_sgpr1 = nofpexcept V_CMP_NGT_F32_e64 0, 0, 0, $vgpr5, 0, implicit $mode, implicit $exec + ; GFX10-NEXT: S_BRANCH %bb.1 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.1: + ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GFX10-NEXT: liveins: $sgpr6, $sgpr9, $sgpr0_sgpr1 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr0_sgpr1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: renamable $sgpr14_sgpr15 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc + ; GFX10-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec + ; GFX10-NEXT: S_BRANCH %bb.2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.2: + ; GFX10-NEXT: successors: %bb.3(0x80000000) + ; GFX10-NEXT: liveins: $sgpr6 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: $m0 = COPY killed renamable $sgpr6 + ; GFX10-NEXT: S_BRANCH %bb.3 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.3: + ; GFX10-NEXT: S_ENDPGM 0 + bb.0: + successors: %bb.1(0x80000000) + liveins: $sgpr4 + + renamable $sgpr9 = COPY $sgpr4 + renamable $vgpr5 = IMPLICIT_DEF + renamable $sgpr0_sgpr1 = nofpexcept V_CMP_NGT_F32_e64 0, 0, 0, $vgpr5, 0, implicit $mode, implicit $exec + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2(0x40000000), %bb.8(0x40000000) + liveins: $sgpr6, $sgpr9, $sgpr0_sgpr1 + + $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr0_sgpr1, implicit-def $exec, implicit-def $scc, implicit $exec + renamable $sgpr14_sgpr15 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc + S_CBRANCH_EXECZ %bb.8, implicit $exec + S_BRANCH %bb.2 + + bb.2: + successors: %bb.8(0x40000000) + liveins: $sgpr6 + + $m0 = COPY killed renamable $sgpr6 + S_BRANCH %bb.8 + + bb.8: + + S_ENDPGM 0 + +...