diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -1294,6 +1294,41 @@ return true; } +/// Return true if a target defined block prologue instruction interferes +/// with a sink candidate. +static bool blockPrologueInterferes(MachineBasicBlock *BB, + MachineBasicBlock::iterator End, + MachineInstr &MI, + const TargetRegisterInfo *TRI, + const TargetInstrInfo *TII, + const MachineRegisterInfo *MRI) { + if (BB->begin() == End) + return false; // no prologue + for (MachineBasicBlock::iterator PI = BB->begin(); PI != End; ++PI) { + // Only check target defined prologue instructions + if (!TII->isBasicBlockPrologue(*PI)) + continue; + for (auto &MO : MI.operands()) { + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isUse()) { + if (Register::isPhysicalRegister(Reg) && + (TII->isIgnorableUse(MO) || (MRI && MRI->isConstantPhysReg(Reg)))) + continue; + if (PI->modifiesRegister(Reg, TRI)) + return true; + } else { + if (PI->readsRegister(Reg, TRI)) + return true; + } + } + } + return false; +} + /// SinkInstruction - Determine whether it is safe to sink the specified machine /// instruction out of its current block into a successor. bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, @@ -1407,6 +1442,10 @@ // Determine where to insert into. Skip phi nodes. MachineBasicBlock::iterator InsertPos = SuccToSinkTo->SkipPHIsAndLabels(SuccToSinkTo->begin()); + if (blockPrologueInterferes(SuccToSinkTo, InsertPos, MI, TRI, TII, MRI)) { + LLVM_DEBUG(dbgs() << " *** Not sinking: prologue interference\n"); + return false; + } // Collect debug users of any vreg that this inst defines. SmallVector DbgUsersToSink; @@ -1805,11 +1844,19 @@ } auto DbgValsToSink = DbgValsToSinkMap.takeVector(); + LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccBB); + + MachineBasicBlock::iterator InsertPos = + SuccBB->SkipPHIsAndLabels(SuccBB->begin()); + if (blockPrologueInterferes(SuccBB, InsertPos, MI, TRI, TII, nullptr)) { + LLVM_DEBUG( + dbgs() << " *** Not sinking: prologue clobbers interference\n"); + continue; + } + // Clear the kill flag if SrcReg is killed between MI and the end of the // block. clearKillFlags(&MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI); - MachineBasicBlock::iterator InsertPos = - SuccBB->SkipPHIsAndLabels(SuccBB->begin()); performSink(MI, *SuccBB, InsertPos, DbgValsToSink); updateLiveIn(&MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy); diff --git a/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir b/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir --- a/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir +++ b/llvm/test/CodeGen/AMDGPU/sink-after-control-flow-postra.mir @@ -17,16 +17,16 @@ ; GFX10-NEXT: successors: %bb.1(0x80000000) ; GFX10-NEXT: liveins: $sgpr4 ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: renamable $sgpr9 = COPY $sgpr4 ; GFX10-NEXT: renamable $vgpr5 = IMPLICIT_DEF ; GFX10-NEXT: renamable $sgpr0_sgpr1 = nofpexcept V_CMP_NGT_F32_e64 0, 0, 0, $vgpr5, 0, implicit $mode, implicit $exec ; GFX10-NEXT: S_BRANCH %bb.1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) - ; GFX10-NEXT: liveins: $sgpr4:0x0000000000000003, $sgpr6, $sgpr0_sgpr1 + ; GFX10-NEXT: liveins: $sgpr6, $sgpr9, $sgpr0_sgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr0_sgpr1, implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX10-NEXT: renamable $sgpr9 = COPY $sgpr4 ; GFX10-NEXT: renamable $sgpr14_sgpr15 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc ; GFX10-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec ; GFX10-NEXT: S_BRANCH %bb.2 @@ -70,3 +70,69 @@ S_ENDPGM 0 ... +--- +name: _amdgpu_ps_main2 +alignment: 1 +tracksRegLiveness: true +registers: [] +liveins: + - { reg: '$sgpr4', virtual-reg: '' } +body: | + ; GFX10-LABEL: name: _amdgpu_ps_main2 + ; GFX10: bb.0: + ; GFX10-NEXT: successors: %bb.1(0x80000000) + ; GFX10-NEXT: liveins: $sgpr4_sgpr5, $sgpr6_sgpr7 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: renamable $sgpr9 = COPY $sgpr4 + ; GFX10-NEXT: renamable $vgpr5 = IMPLICIT_DEF + ; GFX10-NEXT: renamable $sgpr0_sgpr1 = COPY $sgpr6_sgpr7 + ; GFX10-NEXT: S_BRANCH %bb.1 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.1: + ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GFX10-NEXT: liveins: $sgpr6, $sgpr9, $sgpr0_sgpr1 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr0_sgpr1, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: renamable $sgpr14_sgpr15 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc + ; GFX10-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec + ; GFX10-NEXT: S_BRANCH %bb.2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.2: + ; GFX10-NEXT: successors: %bb.3(0x80000000) + ; GFX10-NEXT: liveins: $sgpr6 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: $m0 = COPY killed renamable $sgpr6 + ; GFX10-NEXT: S_BRANCH %bb.3 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.3: + ; GFX10-NEXT: S_ENDPGM 0 + bb.0: + successors: %bb.1(0x80000000) + liveins: $sgpr4_sgpr5, $sgpr6_sgpr7 + + renamable $sgpr9 = COPY $sgpr4 + renamable $vgpr5 = IMPLICIT_DEF + renamable $sgpr0_sgpr1 = COPY $sgpr6_sgpr7 + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2(0x40000000), %bb.8(0x40000000) + liveins: $sgpr6, $sgpr9, $sgpr0_sgpr1 + + $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr0_sgpr1, implicit-def $exec, implicit-def $scc, implicit $exec + renamable $sgpr14_sgpr15 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc + S_CBRANCH_EXECZ %bb.8, implicit $exec + S_BRANCH %bb.2 + + bb.2: + successors: %bb.8(0x40000000) + liveins: $sgpr6 + + $m0 = COPY killed renamable $sgpr6 + S_BRANCH %bb.8 + + bb.8: + + S_ENDPGM 0 + +...