diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp --- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -277,6 +277,7 @@ void ReadRegister(MCRegister Reg, MachineInstr &Reader, DebugType DT); void ForwardCopyPropagateBlock(MachineBasicBlock &MBB); void BackwardCopyPropagateBlock(MachineBasicBlock &MBB); + void EliminateSpillageCopies(MachineBasicBlock &MBB); bool eraseIfRedundant(MachineInstr &Copy, MCRegister Src, MCRegister Def); void forwardUses(MachineInstr &MI); void propagateDefs(MachineInstr &MI); @@ -920,6 +921,165 @@ Tracker.clear(); } +void MachineCopyPropagation::EliminateSpillageCopies(MachineBasicBlock &MBB) { + LLVM_DEBUG(dbgs() << "MCP: Eliminating spillage copies in " << MBB.getName() + << "\n"); + DenseMap LeadRegs; + DenseMap> SpillChains; + auto IsFoldableCopy = [](const MachineInstr *Copy) { + assert(Copy->isCopy() && "MI is expected to be COPY"); + return Copy->getOperand(0).isRenamable() && + Copy->getOperand(1).isRenamable(); + }; + auto TryFoldSpillageCopies = [&, this](MCRegister LeadReg) { + assert(SpillChains.count(LeadReg) && + "Corresponding chain should be tracked"); + SmallVector Chain = SpillChains[LeadReg]; + // We don't need this chain anymore. + SpillChains.erase(LeadReg); + // Clear LeadReg. + for (MachineInstr *Copy : Chain) { + LeadRegs.erase(Copy->getOperand(0).getReg().asMCReg()); + LeadRegs.erase(Copy->getOperand(1).getReg().asMCReg()); + } + const size_t Len = Chain.size(); + assert(Len % 2 == 0 && "Must be paired"); + + // Bailout if less then 3 pairs. + if (Len < 6) + return; + // Confirm we have a valid chain. + for (size_t I = 2; I != Len; I += 2) { + MachineInstr *SpillMI = Chain[I]; + MachineInstr *ReloadMI = Chain[I + 1]; + MachineInstr *PrevSpillMI = Chain[I - 2]; + MachineInstr *PrevReloadMI = Chain[I - 1]; + // dbgs() << "Checking:\n"; + // SpillMI->dump(); + // PrevSpillMI->dump(); + // PrevReloadMI->dump(); + // ReloadMI->dump(); + assert(PrevSpillMI->getOperand(0).getReg().asMCReg() == + SpillMI->getOperand(1).getReg().asMCReg()); + assert(PrevReloadMI->getOperand(1).getReg().asMCReg() == + ReloadMI->getOperand(0).getReg().asMCReg()); + } + // If one in the chain is not foldable, give up. + for (size_t I = 0; I != Len; I += 2) { + MachineInstr *SpillMI = Chain[I]; + MachineInstr *ReloadMI = Chain[I + 1]; + if (!IsFoldableCopy(SpillMI) || !IsFoldableCopy(ReloadMI)) + return; + } + + // TODO: If the last reload has kill flag on src, we can fold into the last + // spill/reload pair. + Chain[0]->getOperand(0).setReg(Chain[Len - 4]->getOperand(0).getReg()); + Chain[1]->getOperand(1).setReg(Chain[Len - 3]->getOperand(1).getReg()); + for (size_t I = 2; I != Len - 2; ++I) + MaybeDeadCopies.insert(Chain[I]); + }; + + for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { + // IntEqClasses not feasible here. + if (MI.isCopy() && MI.getNumOperands() == 2 && + !TRI->regsOverlap(MI.getOperand(0).getReg(), + MI.getOperand(1).getReg())) { + MCRegister DefReg = MI.getOperand(0).getReg().asMCReg(); + MCRegister SrcReg = MI.getOperand(1).getReg().asMCReg(); + MachineInstr *MaybeSpill = Tracker.findAvailCopy(MI, SrcReg, *TRI); + auto IsSpillReload = [](const MachineInstr &MI0, + const MachineInstr &MI1) { + return MI0.getOperand(0).getReg().asMCReg() == + MI1.getOperand(1).getReg().asMCReg() && + MI0.getOperand(1).getReg().asMCReg() == + MI1.getOperand(0).getReg().asMCReg(); + }; + if (MaybeSpill) { + // dbgs() << "MaybeSpill:\n"; + // MaybeSpill->dump(); + if (IsSpillReload(*MaybeSpill, MI)) { + // dbgs() << "Found spill/reload pair:\n"; + // MaybeSpill->dump(); + // MI.dump(); + // We have found a spill/reload pair. Now look for which chain this + // pair belongs to. + auto LeadRegI = LeadRegs.find(DefReg); + if (LeadRegI == LeadRegs.end()) { + assert(!SpillChains.count(DefReg) && + "Chain for DefReg should not exist without a LeadReg"); + SpillChains.insert({DefReg, {MaybeSpill, &MI}}); + LeadRegs[DefReg] = DefReg; + LeadRegs[SrcReg] = DefReg; + } else { + MCRegister LeadReg = LeadRegI->second; + assert(SpillChains.count(LeadReg) && + "Chain with a LeadReg should exist"); + SmallVector &Chain = SpillChains[LeadRegI->second]; + Chain.push_back(MaybeSpill); + Chain.push_back(&MI); + LeadRegs[DefReg] = LeadReg; + LeadRegs[SrcReg] = LeadReg; + } + } else if (LeadRegs.count(DefReg)) { + // Clobber by non-paired copy and we have a available chain. Now + // perform transformation before invalidating it. + MCRegister LeadReg = LeadRegs[DefReg]; + TryFoldSpillageCopies(LeadReg); + } + } + // dbgs() << "Tracking:\n"; + // MI.dump(); + Tracker.trackCopy(&MI, *TRI); + continue; + } + // For Non-copy instruction MI. + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) + continue; + MCRegister Reg = MO.getReg().asMCReg(); + if (!Reg) + continue; + MachineInstr *MaybeCopy = Tracker.findAvailCopy(MI, Reg, *TRI); + if (!MaybeCopy) + continue; + MCRegister DefReg = MaybeCopy->getOperand(0).getReg().asMCReg(); + assert(TRI->regsOverlap(Reg, DefReg) && + "Tracker should have tracked the COPY writing to Reg"); + MCRegister SrcReg = MaybeCopy->getOperand(1).getReg().asMCReg(); + if (LeadRegs.count(SrcReg)) { + MCRegister LeadReg = LeadRegs[SrcReg]; + if (SpillChains.count(LeadReg)) { + // We have a available chain. Now perform transformation. + // We don't need this chain anymore. + // dbgs() << "SpillChain size for " << printReg(LeadReg, TRI) << ": " + // << SpillChains[LeadReg].size() << "\n"; + TryFoldSpillageCopies(LeadReg); + } + } + // dbgs() << "Clobbering " << printReg(Reg, TRI) << "\n"; + Tracker.clobberRegister(Reg, *TRI); + } + } + // Handle remaining chains. + for (auto I : SpillChains) + TryFoldSpillageCopies(I.first); + + for (MachineInstr *Copy : MaybeDeadCopies) { + Register Src = Copy->getOperand(1).getReg(); + Register Def = Copy->getOperand(0).getReg(); + SmallVector MaybeDeadDbgUsers(CopyDbgUsers[Copy].begin(), + CopyDbgUsers[Copy].end()); + MRI->updateDbgUsersToReg(Src.asMCReg(), Def.asMCReg(), MaybeDeadDbgUsers); + Copy->eraseFromParent(); + ++NumDeletes; + } + + MaybeDeadCopies.clear(); + CopyDbgUsers.clear(); + Tracker.clear(); +} + bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -931,6 +1091,7 @@ MRI = &MF.getRegInfo(); for (MachineBasicBlock &MBB : MF) { + EliminateSpillageCopies(MBB); BackwardCopyPropagateBlock(MBB); ForwardCopyPropagateBlock(MBB); } diff --git a/llvm/test/CodeGen/PowerPC/mcp-elim-eviction-chain.mir b/llvm/test/CodeGen/PowerPC/mcp-elim-eviction-chain.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/mcp-elim-eviction-chain.mir @@ -0,0 +1,37 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O3 -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \ +# RUN: -simplify-mir -run-pass=machine-cp %s -o - | FileCheck %s + +--- +name: test0 +alignment: 4 +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x4, $x5, $x20, $x21, $x22 + ; CHECK-LABEL: name: test0 + ; CHECK: liveins: $x4, $x5, $x20, $x21, $x22 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $x24 = COPY $x4 + ; CHECK-NEXT: $x23 = COPY renamable $x20 + ; CHECK-NEXT: renamable $x20 = ADD8 $x4, $x5 + ; CHECK-NEXT: renamable $x4 = COPY renamable $x20 + ; CHECK-NEXT: renamable $x20 = COPY $x23 + ; CHECK-NEXT: renamable $x23 = COPY renamable $x24 + ; CHECK-NEXT: $x3 = COPY $x4 + ; CHECK-NEXT: BLR8 implicit $lr8, implicit undef $rm, implicit $x3, implicit $x20, implicit $x21, implicit $x22, implicit $x23 + renamable $x23 = COPY $x4 + renamable $x24 = COPY renamable $x23 + renamable $x23 = COPY renamable $x22 + renamable $x22 = COPY renamable $x21 + renamable $x21 = COPY renamable $x20 + renamable $x20 = ADD8 $x4, $x5 + renamable $x4 = COPY renamable $x20 + renamable $x20 = COPY renamable $x21 + renamable $x21 = COPY renamable $x22 + renamable $x22 = COPY renamable $x23 + renamable $x23 = COPY renamable $x24 + $x3 = COPY $x4 + BLR8 implicit $lr8, implicit undef $rm, implicit $x3, implicit $x20, implicit $x21, implicit $x22, implicit $x23 + +... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll @@ -68,17 +68,13 @@ ; CHECK-NEXT: vshl.i16 q2, q0, #3 ; CHECK-NEXT: vand q3, q1, q5 ; CHECK-NEXT: vmov q1, q7 -; CHECK-NEXT: vand q2, q2, q6 -; CHECK-NEXT: vmov q7, q6 -; CHECK-NEXT: vmov q6, q5 -; CHECK-NEXT: vmov q5, q4 +; CHECK-NEXT: vmov q7, q4 ; CHECK-NEXT: vldrw.u32 q4, [sp, #48] @ 16-byte Reload +; CHECK-NEXT: vand q2, q2, q6 ; CHECK-NEXT: vshr.u16 q0, q0, #9 ; CHECK-NEXT: vmla.u16 q4, q2, r2 ; CHECK-NEXT: vshr.u16 q2, q4, #11 -; CHECK-NEXT: vmov q4, q5 -; CHECK-NEXT: vmov q5, q6 -; CHECK-NEXT: vmov q6, q7 +; CHECK-NEXT: vmov q4, q7 ; CHECK-NEXT: vmov q7, q1 ; CHECK-NEXT: vorr q1, q3, q2 ; CHECK-NEXT: vldrw.u32 q2, [sp, #16] @ 16-byte Reload diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll --- a/llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll +++ b/llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll @@ -1123,13 +1123,10 @@ ; CHECK-NEXT: vldrwt.u32 q0, [r11] ; CHECK-NEXT: vstrw.32 q6, [sp, #40] @ 16-byte Spill ; CHECK-NEXT: vmov q6, q5 -; CHECK-NEXT: vpst +; CHECK-NEXT: vpstt ; CHECK-NEXT: vfmat.f32 q1, q0, q7 -; CHECK-NEXT: vmov q5, q4 -; CHECK-NEXT: vmov q4, q3 -; CHECK-NEXT: vmov q3, q1 -; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q0, [r6] +; CHECK-NEXT: vmov q5, q1 ; CHECK-NEXT: vldrw.u32 q1, [sp, #56] @ 16-byte Reload ; CHECK-NEXT: adds r7, r6, r5 ; CHECK-NEXT: vpstt @@ -1137,13 +1134,11 @@ ; CHECK-NEXT: vldrwt.u32 q0, [r7] ; CHECK-NEXT: adds r6, r7, r5 ; CHECK-NEXT: vstrw.32 q1, [sp, #56] @ 16-byte Spill -; CHECK-NEXT: vmov q1, q3 -; CHECK-NEXT: vmov q3, q4 ; CHECK-NEXT: vpstt ; CHECK-NEXT: vfmat.f32 q3, q0, q7 ; CHECK-NEXT: vldrwt.u32 q0, [r6] -; CHECK-NEXT: vmov q4, q5 ; CHECK-NEXT: adds r7, r6, r5 +; CHECK-NEXT: vmov q1, q5 ; CHECK-NEXT: vpstt ; CHECK-NEXT: vfmat.f32 q4, q0, q7 ; CHECK-NEXT: vldrwt.u32 q0, [r7] @@ -1412,9 +1407,7 @@ ; CHECK-NEXT: vmov q6, q5 ; CHECK-NEXT: vpst ; CHECK-NEXT: vfmat.f32 q7, q1, q0 -; CHECK-NEXT: vmov q5, q3 -; CHECK-NEXT: vmov q3, q4 -; CHECK-NEXT: vmov q4, q2 +; CHECK-NEXT: vmov q5, q2 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q1, [r6] ; CHECK-NEXT: vldrw.u32 q2, [sp, #56] @ 16-byte Reload @@ -1430,23 +1423,20 @@ ; CHECK-NEXT: vldrwt.u32 q1, [r6] ; CHECK-NEXT: adds r7, r6, r5 ; CHECK-NEXT: vstrw.32 q2, [sp, #72] @ 16-byte Spill -; CHECK-NEXT: vmov q2, q4 -; CHECK-NEXT: vmov q4, q3 +; CHECK-NEXT: vmov q2, q5 +; CHECK-NEXT: adds r6, r7, r5 ; CHECK-NEXT: vpstt ; CHECK-NEXT: vfmat.f32 q2, q1, q0 ; CHECK-NEXT: vldrwt.u32 q1, [r7] -; CHECK-NEXT: adds r6, r7, r5 -; CHECK-NEXT: vmov q3, q5 +; CHECK-NEXT: vmov q5, q6 +; CHECK-NEXT: vldrw.u32 q6, [sp, #40] @ 16-byte Reload ; CHECK-NEXT: vpstt ; CHECK-NEXT: vfmat.f32 q4, q1, q0 ; CHECK-NEXT: vldrwt.u32 q1, [r6] -; CHECK-NEXT: vmov q5, q6 ; CHECK-NEXT: add r6, r5 -; CHECK-NEXT: vpstt +; CHECK-NEXT: vpsttt ; CHECK-NEXT: vfmat.f32 q5, q1, q0 ; CHECK-NEXT: vldrwt.u32 q1, [r6] -; CHECK-NEXT: vldrw.u32 q6, [sp, #40] @ 16-byte Reload -; CHECK-NEXT: vpst ; CHECK-NEXT: vfmat.f32 q3, q1, q0 ; CHECK-NEXT: le lr, .LBB7_3 ; CHECK-NEXT: @ %bb.4: @ %middle.block