diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp --- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -80,6 +80,8 @@ STATISTIC(NumDeletes, "Number of dead copies deleted"); STATISTIC(NumCopyForwards, "Number of copy uses forwarded"); STATISTIC(NumCopyBackwardPropagated, "Number of copy defs backward propagated"); +STATISTIC(NumFunctionsSeen, "Number of functions have seen by MCP"); +STATISTIC(SpillageChainsLength, "Length of spillage chains"); DEBUG_COUNTER(FwdCounter, "machine-cp-fwd", "Controls which register COPYs are forwarded"); @@ -103,7 +105,7 @@ class CopyTracker { struct CopyInfo { - MachineInstr *MI; + MachineInstr *MI, *LastSeenUseInCopy; SmallVector DefRegs; bool Avail; }; @@ -189,15 +191,16 @@ // Remember Def is defined by the copy. for (MCRegUnitIterator RUI(Def, &TRI); RUI.isValid(); ++RUI) - Copies[*RUI] = {MI, {}, true}; + Copies[*RUI] = {MI, nullptr, {}, true}; // Remember source that's copied to Def. Once it's clobbered, then // it's no longer available for copy propagation. for (MCRegUnitIterator RUI(Src, &TRI); RUI.isValid(); ++RUI) { - auto I = Copies.insert({*RUI, {nullptr, {}, false}}); + auto I = Copies.insert({*RUI, {nullptr, nullptr, {}, false}}); auto &Copy = I.first->second; if (!is_contained(Copy.DefRegs, Def)) Copy.DefRegs.push_back(Def); + Copy.LastSeenUseInCopy = MI; } } @@ -286,6 +289,48 @@ return AvailCopy; } + // Find last COPY that defines Reg before Current MachineInstr. + MachineInstr *findLastSeenDefInCopy(const MachineInstr &Current, + MCRegister Reg, + const TargetRegisterInfo &TRI, + const TargetInstrInfo &TII, + bool UseCopyInstr) { + MCRegUnitIterator RUI(Reg, &TRI); + auto CI = Copies.find(*RUI); + if (CI == Copies.end() || !CI->second.Avail) + return nullptr; + + MachineInstr *DefCopy = CI->second.MI; + std::optional CopyOperands = + isCopyInstr(*DefCopy, TII, UseCopyInstr); + Register Def = CopyOperands->Destination->getReg(); + if (!TRI.isSubRegisterEq(Def, Reg)) + return nullptr; + + for (const MachineInstr &MI : + make_range(static_cast(DefCopy)->getIterator(), + Current.getIterator())) + for (const MachineOperand &MO : MI.operands()) + if (MO.isRegMask()) + if (MO.clobbersPhysReg(Def)) { + LLVM_DEBUG(dbgs() << "MCP: Removed tracking of " + << printReg(Def, &TRI) << "\n"); + return nullptr; + } + + return DefCopy; + } + + // Find last COPY that uses Reg. + MachineInstr *findLastSeenUseInCopy(MCRegister Reg, + const TargetRegisterInfo &TRI) { + MCRegUnitIterator RUI(Reg, &TRI); + auto CI = Copies.find(*RUI); + if (CI == Copies.end()) + return nullptr; + return CI->second.LastSeenUseInCopy; + } + void clear() { Copies.clear(); } @@ -325,6 +370,7 @@ void ReadRegister(MCRegister Reg, MachineInstr &Reader, DebugType DT); void ForwardCopyPropagateBlock(MachineBasicBlock &MBB); void BackwardCopyPropagateBlock(MachineBasicBlock &MBB); + void EliminateSpillageCopies(MachineBasicBlock &MBB); bool eraseIfRedundant(MachineInstr &Copy, MCRegister Src, MCRegister Def); void forwardUses(MachineInstr &MI); void propagateDefs(MachineInstr &MI); @@ -1008,10 +1054,332 @@ Tracker.clear(); } +static void LLVM_ATTRIBUTE_UNUSED printSpillReloadChain( + DenseMap> &SpillChain, + DenseMap> &ReloadChain, + MachineInstr *Leader) { + auto &SC = SpillChain[Leader]; + auto &RC = ReloadChain[Leader]; + for (auto I = SC.rbegin(), E = SC.rend(); I != E; ++I) + (*I)->dump(); + for (MachineInstr *MI : RC) + MI->dump(); +} + +// Remove spill-reload like copy chains. For example +// r0 = COPY r1 +// r1 = COPY r2 +// r2 = COPY r3 +// r3 = COPY r4 +// +// r4 = COPY r3 +// r3 = COPY r2 +// r2 = COPY r1 +// r1 = COPY r0 +// will be folded into +// r0 = COPY r1 +// r1 = COPY r4 +// +// r4 = COPY r1 +// r1 = COPY r0 +// TODO: Currently we don't track usage of r0 outside the chain, so we +// conservatively keep its value as it was before the rewrite. +// +// The algorithm is trying to keep +// property#1: No Def of spill COPY in the chain is used or defined until the +// paired reload COPY in the chain uses the Def. +// +// property#2: NO Source of COPY in the chain is used or defined until the next +// COPY in the chain defines the Source, except the innermost spill-reload +// pair. +// +// The algorithm is conducted by checking every COPY inside the MBB, assuming +// the COPY is a reload COPY, then try to find paired spill COPY by searching +// the COPY defines the Src of the reload COPY backward. If such pair is found, +// it either belongs to an existing chain or a new chain depends on +// last available COPY uses the Def of the reload COPY. +// Implementation notes, we use CopyTracker::findLastDefCopy(Reg, ...) to find +// out last COPY that defines Reg; we use CopyTracker::findLastUseCopy(Reg, ...) +// to find out last COPY that uses Reg. When we are encountered with a Non-COPY +// instruction, we check registers in the operands of this instruction. If this +// Reg is defined by a COPY, we untrack this Reg via +// CopyTracker::clobberRegister(Reg, ...). +void MachineCopyPropagation::EliminateSpillageCopies(MachineBasicBlock &MBB) { + // ChainLeader maps MI inside a spill-reload chain to its innermost reload COPY. + // Thus we can track if a MI belongs to an existing spill-reload chain. + DenseMap ChainLeader; + // SpillChain maps innermost reload COPY of a spill-reload chain to a sequence + // of COPYs that forms spills of a spill-reload chain. + // ReloadChain maps innermost reload COPY of a spill-reload chain to a + // sequence of COPYs that forms reloads of a spill-reload chain. + DenseMap> SpillChain, ReloadChain; + // If a COPY's Source has use or def until next COPY defines the Source, + // we put the COPY in this set to keep property#2. + DenseSet CopySourceInvalid; + + auto TryFoldSpillageCopies = + [&, this](const SmallVectorImpl &SC, + const SmallVectorImpl &RC) { + assert(SC.size() == RC.size() && "Spill-reload should be paired"); + + // We need at least 3 pairs of copies for the transformation to apply, + // because the first outermost pair cannot be removed since we don't + // recolor outside of the chain and that we need at least one temporary + // spill slot to shorten the chain. If we only have a chain of two + // pairs, we already have the shortest sequence this code can handle: + // the outermost pair for the temporary spill slot, and the pair that + // use that temporary spill slot for the other end of the chain. + // TODO: We might be able to simplify to one spill-reload pair if collecting + // more infomation about the outermost COPY. + if (SC.size() <= 2) + return; + + // If violate property#2, we don't fold the chain. + for (size_t I = 1, N = SC.size(); I < N; ++I) { + if (CopySourceInvalid.count(SC[I])) + return; + } + + for (size_t I = 0, N = RC.size() - 1; I < N; ++I) { + if (CopySourceInvalid.count(RC[I])) + return; + } + + auto CheckCopyConstraint = [this](Register Def, Register Src) { + for (const TargetRegisterClass *RC : TRI->regclasses()) { + if (RC->contains(Def) && RC->contains(Src)) + return true; + } + return false; + }; + + auto UpdateReg = [](MachineInstr *MI, const MachineOperand *Old, + const MachineOperand *New) { + for (MachineOperand &MO : MI->operands()) { + if (&MO == Old) + MO.setReg(New->getReg()); + } + }; + + std::optional InnerMostSpillCopy = + isCopyInstr(*SC[0], *TII, UseCopyInstr); + std::optional OuterMostSpillCopy = + isCopyInstr(*SC.back(), *TII, UseCopyInstr); + std::optional InnerMostReloadCopy = + isCopyInstr(*RC[0], *TII, UseCopyInstr); + std::optional OuterMostReloadCopy = + isCopyInstr(*RC.back(), *TII, UseCopyInstr); + if (!CheckCopyConstraint(OuterMostSpillCopy->Source->getReg(), + InnerMostSpillCopy->Source->getReg()) || + !CheckCopyConstraint(InnerMostReloadCopy->Destination->getReg(), + OuterMostReloadCopy->Destination->getReg())) + return; + + SpillageChainsLength += SC.size() + RC.size(); + UpdateReg(SC[0], InnerMostSpillCopy->Destination, + OuterMostSpillCopy->Source); + UpdateReg(RC[0], InnerMostReloadCopy->Source, + OuterMostReloadCopy->Destination); + + for (size_t I = 1; I < SC.size() - 1; ++I) { + SC[I]->eraseFromParent(); + RC[I]->eraseFromParent(); + } + }; + + auto IsFoldableCopy = [this](const MachineInstr &MaybeCopy) { + if (MaybeCopy.getNumImplicitOperands() > 0) + return false; + std::optional CopyOperands = + isCopyInstr(MaybeCopy, *TII, UseCopyInstr); + if (!CopyOperands) + return false; + Register Src = CopyOperands->Source->getReg(); + Register Def = CopyOperands->Destination->getReg(); + return Src && Def && !TRI->regsOverlap(Src, Def) && + CopyOperands->Source->isRenamable() && + CopyOperands->Destination->isRenamable(); + }; + + auto IsSpillReloadPair = [&, this](const MachineInstr &Spill, + const MachineInstr &Reload) { + if (!IsFoldableCopy(Spill) || !IsFoldableCopy(Reload)) + return false; + std::optional SpillCopy = + isCopyInstr(Spill, *TII, UseCopyInstr); + std::optional ReloadCopy = + isCopyInstr(Reload, *TII, UseCopyInstr); + if (!SpillCopy || !ReloadCopy) + return false; + return SpillCopy->Source->getReg() == ReloadCopy->Destination->getReg() && + SpillCopy->Destination->getReg() == ReloadCopy->Source->getReg(); + }; + + auto IsChainedCopy = [&, this](const MachineInstr &Prev, + const MachineInstr &Current) { + if (!IsFoldableCopy(Prev) || !IsFoldableCopy(Current)) + return false; + std::optional PrevCopy = + isCopyInstr(Prev, *TII, UseCopyInstr); + std::optional CurrentCopy = + isCopyInstr(Current, *TII, UseCopyInstr); + if (!PrevCopy || !CurrentCopy) + return false; + return PrevCopy->Source->getReg() == CurrentCopy->Destination->getReg(); + }; + + for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { + std::optional CopyOperands = + isCopyInstr(MI, *TII, UseCopyInstr); + + // Update track information via non-copy instruction. + SmallSet RegsToClobber; + if (!CopyOperands) { + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); + if (!Reg) + continue; + MachineInstr *LastUseCopy = + Tracker.findLastSeenUseInCopy(Reg.asMCReg(), *TRI); + if (LastUseCopy) { + LLVM_DEBUG(dbgs() << "MCP: Copy source of\n"); + LLVM_DEBUG(LastUseCopy->dump()); + LLVM_DEBUG(dbgs() << "might be invalidated by\n"); + LLVM_DEBUG(MI.dump()); + CopySourceInvalid.insert(LastUseCopy); + } + // Must be noted Tracker.clobberRegister(Reg, ...) removes tracking of + // Reg, i.e, COPY that defines Reg is removed from the mapping as well + // as marking COPYs that uses Reg unavailable. + // We don't invoke CopyTracker::clobberRegister(Reg, ...) if Reg is not + // defined by a previous COPY, since we don't want to make COPYs uses + // Reg unavailable. + if (Tracker.findLastSeenDefInCopy(MI, Reg.asMCReg(), *TRI, *TII, + UseCopyInstr)) + // Thus we can keep the property#1. + RegsToClobber.insert(Reg); + } + for (Register Reg : RegsToClobber) { + Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr); + LLVM_DEBUG(dbgs() << "MCP: Removed tracking of " << printReg(Reg, TRI) + << "\n"); + } + continue; + } + + Register Src = CopyOperands->Source->getReg(); + Register Def = CopyOperands->Destination->getReg(); + // Check if we can find a pair spill-reload copy. + LLVM_DEBUG(dbgs() << "MCP: Searching paired spill for reload: "); + LLVM_DEBUG(MI.dump()); + MachineInstr *MaybeSpill = + Tracker.findLastSeenDefInCopy(MI, Src.asMCReg(), *TRI, *TII, UseCopyInstr); + bool MaybeSpillIsChained = ChainLeader.count(MaybeSpill); + if (!MaybeSpillIsChained && MaybeSpill && + IsSpillReloadPair(*MaybeSpill, MI)) { + // Check if we already have an existing chain. Now we have a + // spill-reload pair. + // L2: r2 = COPY r3 + // L5: r3 = COPY r2 + // Looking for a valid COPY before L5 which uses r3. + // This can be serverial cases. + // Case #1: + // No COPY is found, which can be r3 is def-use between (L2, L5), we + // create a new chain for L2 and L5. + // Case #2: + // L2: r2 = COPY r3 + // L5: r3 = COPY r2 + // Such COPY is found and is L2, we create a new chain for L2 and L5. + // Case #3: + // L2: r2 = COPY r3 + // L3: r1 = COPY r3 + // L5: r3 = COPY r2 + // we create a new chain for L2 and L5. + // Case #4: + // L2: r2 = COPY r3 + // L3: r1 = COPY r3 + // L4: r3 = COPY r1 + // L5: r3 = COPY r2 + // Such COPY won't be found since L4 defines r3. we create a new chain + // for L2 and L5. + // Case #5: + // L2: r2 = COPY r3 + // L3: r3 = COPY r1 + // L4: r1 = COPY r3 + // L5: r3 = COPY r2 + // COPY is found and is L4 which belongs to an existing chain, we add + // L2 and L5 to this chain. + LLVM_DEBUG(dbgs() << "MCP: Found spill: "); + LLVM_DEBUG(MaybeSpill->dump()); + MachineInstr *MaybePrevReload = + Tracker.findLastSeenUseInCopy(Def.asMCReg(), *TRI); + auto Leader = ChainLeader.find(MaybePrevReload); + MachineInstr *L = nullptr; + if (Leader == ChainLeader.end() || + (MaybePrevReload && !IsChainedCopy(*MaybePrevReload, MI))) { + L = &MI; + assert(!SpillChain.count(L) && + "SpillChain should not have contained newly found chain"); + } else { + assert(MaybePrevReload && + "Found a valid leader through nullptr should not happend"); + L = Leader->second; + assert(SpillChain[L].size() > 0 && + "Existing chain's length should be larger than zero"); + } + assert(!ChainLeader.count(&MI) && !ChainLeader.count(MaybeSpill) && + "Newly found paired spill-reload should not belong to any chain " + "at this point"); + ChainLeader.insert({MaybeSpill, L}); + ChainLeader.insert({&MI, L}); + SpillChain[L].push_back(MaybeSpill); + ReloadChain[L].push_back(&MI); + LLVM_DEBUG(dbgs() << "MCP: Chain " << L << " now is:\n"); + LLVM_DEBUG(printSpillReloadChain(SpillChain, ReloadChain, L)); + } else if (MaybeSpill && !MaybeSpillIsChained) { + // MaybeSpill is unable to pair with MI. That's to say adding MI makes + // the chain invalid. + // The COPY defines Src is no longer considered as a candidate of a + // valid chain. Since we expect the Def of a spill copy isn't used by + // any COPY instruction until a reload copy. For example: + // L1: r1 = COPY r2 + // L2: r3 = COPY r1 + // If we later have + // L1: r1 = COPY r2 + // L2: r3 = COPY r1 + // L3: r2 = COPY r1 + // L1 and L3 can't be a valid spill-reload pair. + // Thus we keep the property#1. + LLVM_DEBUG(dbgs() << "MCP: Not paired spill-reload:\n"); + LLVM_DEBUG(MaybeSpill->dump()); + LLVM_DEBUG(MI.dump()); + Tracker.clobberRegister(Src.asMCReg(), *TRI, *TII, UseCopyInstr); + LLVM_DEBUG(dbgs() << "MCP: Removed tracking of " << printReg(Src, TRI) + << "\n"); + } + Tracker.trackCopy(&MI, *TRI, *TII, UseCopyInstr); + } + + for (auto I = SpillChain.begin(), E = SpillChain.end(); I != E; ++I) { + auto &SC = I->second; + assert(ReloadChain.count(I->first) && + "Reload chain of the same leader should exist"); + auto &RC = ReloadChain[I->first]; + TryFoldSpillageCopies(SC, RC); + } + + MaybeDeadCopies.clear(); + CopyDbgUsers.clear(); + Tracker.clear(); +} + bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; + NumFunctionsSeen += 1; Changed = false; TRI = MF.getSubtarget().getRegisterInfo(); @@ -1019,6 +1387,7 @@ MRI = &MF.getRegInfo(); for (MachineBasicBlock &MBB : MF) { + EliminateSpillageCopies(MBB); BackwardCopyPropagateBlock(MBB); ForwardCopyPropagateBlock(MBB); } diff --git a/llvm/test/CodeGen/PowerPC/mcp-elim-eviction-chain.mir b/llvm/test/CodeGen/PowerPC/mcp-elim-eviction-chain.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/mcp-elim-eviction-chain.mir @@ -0,0 +1,265 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O3 -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \ +# RUN: -simplify-mir -run-pass=machine-cp %s -o - | FileCheck %s + +--- | + declare void @foo() + define void @test0() { + entry: + ret void + } + + define void @test1() { + entry: + ret void + } + + define void @test2() { + entry: + ret void + } + + define void @test3() { + entry: + ret void + } + + define void @test4() { + entry: + ret void + } + + define void @test5() { + entry: + ret void + } + + define void @test6() { + entry: + ret void + } + +... +--- +name: test0 +alignment: 4 +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x4, $x5, $x20, $x21, $x22 + ; CHECK-LABEL: name: test0 + ; CHECK: liveins: $x4, $x5, $x20, $x21, $x22 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $x24 = COPY $x4 + ; CHECK-NEXT: $x23 = COPY renamable $x20 + ; CHECK-NEXT: renamable $x20 = ADD8 $x4, $x5 + ; CHECK-NEXT: renamable $x4 = COPY renamable $x20 + ; CHECK-NEXT: renamable $x20 = COPY $x23 + ; CHECK-NEXT: renamable $x23 = COPY renamable $x24 + ; CHECK-NEXT: $x3 = COPY renamable $x4 + ; CHECK-NEXT: BLR8 implicit $lr8, implicit undef $rm, implicit $x3, implicit $x20, implicit $x21, implicit $x22, implicit $x23 + renamable $x23 = COPY renamable $x4 + renamable $x24 = COPY renamable $x23 + renamable $x23 = COPY renamable $x22 + renamable $x22 = COPY renamable $x21 + renamable $x21 = COPY renamable $x20 + renamable $x20 = ADD8 $x4, $x5 + renamable $x4 = COPY renamable $x20 + renamable $x20 = COPY renamable $x21 + renamable $x21 = COPY renamable $x22 + renamable $x22 = COPY renamable $x23 + renamable $x23 = COPY renamable $x24 + $x3 = COPY renamable $x4 + BLR8 implicit $lr8, implicit undef $rm, implicit $x3, implicit $x20, implicit $x21, implicit $x22, implicit $x23 + +... + +# Duplicated pairs. +--- +name: test1 +alignment: 4 +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3, $x20, $x21, $x22, $x23 + ; CHECK-LABEL: name: test1 + ; CHECK: liveins: $x3, $x20, $x21, $x22, $x23 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $x24 = COPY $x3 + ; CHECK-NEXT: renamable $x23 = COPY renamable $x22 + ; CHECK-NEXT: renamable $x22 = COPY renamable $x21 + ; CHECK-NEXT: renamable $x21 = COPY renamable $x20 + ; CHECK-NEXT: BLR8 implicit $lr8, implicit undef $rm, implicit $x3, implicit $x20, implicit $x21, implicit $x22, implicit $x23, implicit $x24 + renamable $x23 = COPY $x3 + renamable $x24 = COPY renamable $x23 + renamable $x23 = COPY renamable $x22 + renamable $x22 = COPY renamable $x21 + renamable $x21 = COPY renamable $x20 + renamable $x20 = COPY renamable $x21 + renamable $x21 = COPY renamable $x22 + renamable $x22 = COPY renamable $x23 + renamable $x23 = COPY renamable $x24 + renamable $x24 = COPY renamable $x23 + renamable $x23 = COPY renamable $x22 + renamable $x22 = COPY renamable $x21 + renamable $x21 = COPY renamable $x20 + BLR8 implicit $lr8, implicit undef $rm, implicit $x3, implicit $x20, implicit $x21, implicit $x22, implicit $x23, implicit $x24 + +... + +# Chain one after one. +--- +name: test2 +alignment: 4 +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3, $x18, $x19, $x20, $x21, $x22, $x23, $x24 + ; CHECK-LABEL: name: test2 + ; CHECK: liveins: $x3, $x18, $x19, $x20, $x21, $x22, $x23, $x24 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $x21 = COPY renamable $x20 + ; CHECK-NEXT: renamable $x20 = COPY renamable $x21 + ; CHECK-NEXT: renamable $x25 = COPY renamable $x24 + ; CHECK-NEXT: renamable $x24 = COPY renamable $x25 + ; CHECK-NEXT: BLR8 implicit $lr8, implicit undef $rm, implicit $x3, implicit $x18, implicit $x19, implicit $x20, implicit $x21, implicit $x22, implicit $x23, implicit $x24, implicit $x25 + renamable $x21 = COPY renamable $x20 + renamable $x20 = COPY renamable $x19 + renamable $x19 = COPY renamable $x18 + renamable $x18 = COPY renamable $x19 + renamable $x19 = COPY renamable $x20 + renamable $x20 = COPY renamable $x21 + renamable $x25 = COPY renamable $x24 + renamable $x24 = COPY renamable $x23 + renamable $x23 = COPY renamable $x22 + renamable $x22 = COPY renamable $x23 + renamable $x23 = COPY renamable $x24 + renamable $x24 = COPY renamable $x25 + BLR8 implicit $lr8, implicit undef $rm, implicit $x3, implicit $x18, implicit $x19, implicit $x20, implicit $x21, implicit $x22, implicit $x23, implicit $x24, implicit $x25 + +... + +# Reorder code in test2, thus we have two chains in build simultaneously. +--- +name: test3 +alignment: 4 +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3, $x18, $x19, $x20, $x21, $x22, $x23, $x24 + ; CHECK-LABEL: name: test3 + ; CHECK: liveins: $x3, $x18, $x19, $x20, $x21, $x22, $x23, $x24 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $x21 = COPY renamable $x20 + ; CHECK-NEXT: renamable $x25 = COPY renamable $x24 + ; CHECK-NEXT: renamable $x20 = COPY renamable $x21 + ; CHECK-NEXT: renamable $x24 = COPY renamable $x25 + ; CHECK-NEXT: BLR8 implicit $lr8, implicit undef $rm, implicit $x3, implicit $x18, implicit $x19, implicit $x20, implicit $x21, implicit $x22, implicit $x23, implicit $x24, implicit $x25 + renamable $x21 = COPY renamable $x20 + renamable $x25 = COPY renamable $x24 + renamable $x20 = COPY renamable $x19 + renamable $x24 = COPY renamable $x23 + renamable $x19 = COPY renamable $x18 + renamable $x23 = COPY renamable $x22 + renamable $x18 = COPY renamable $x19 + renamable $x22 = COPY renamable $x23 + renamable $x19 = COPY renamable $x20 + renamable $x23 = COPY renamable $x24 + renamable $x20 = COPY renamable $x21 + renamable $x24 = COPY renamable $x25 + BLR8 implicit $lr8, implicit undef $rm, implicit $x3, implicit $x18, implicit $x19, implicit $x20, implicit $x21, implicit $x22, implicit $x23, implicit $x24, implicit $x25 + +... + +--- +name: test4 +alignment: 4 +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3, $x4, $x5 + ; CHECK-LABEL: name: test4 + ; CHECK: liveins: $x3, $x4, $x5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: BLR8 implicit $lr8, implicit undef $rm, implicit $x3 + renamable $x5 = COPY renamable $x3 + renamable $x4 = COPY renamable $x3 + renamable $x2 = COPY renamable $x3 + renamable $x3 = COPY renamable $x2 + renamable $x3 = COPY renamable $x4 + renamable $x3 = COPY renamable $x5 + BLR8 implicit $lr8, implicit undef $rm, implicit $x3 + +... + +# Chain across regmask. +--- +name: test5 +alignment: 4 +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x17, $x16, $x15, $x14, $x3 + ; CHECK-LABEL: name: test5 + ; CHECK: liveins: $x17, $x16, $x15, $x14, $x3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $x18 = COPY renamable $x17 + ; CHECK-NEXT: $x17 = COPY renamable $x3 + ; CHECK-NEXT: BL8_NOP @foo, csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit-def $x3, implicit $x3 + ; CHECK-NEXT: renamable $x3 = COPY $x17 + ; CHECK-NEXT: BLR8 implicit $lr8, implicit undef $rm, implicit $x3 + renamable $x18 = COPY renamable $x17 + renamable $x17 = COPY renamable $x16 + renamable $x16 = COPY renamable $x15 + renamable $x15 = COPY renamable $x14 + renamable $x14 = COPY renamable $x3 + BL8_NOP @foo, csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit-def $x3, implicit $x3 + renamable $x3 = COPY renamable $x14 + renamable $x14 = COPY renamable $x15 + renamable $x15 = COPY renamable $x16 + renamable $x16 = COPY renamable $x17 + renamable $x17 = COPY renamable $x18 + BLR8 implicit $lr8, implicit undef $rm, implicit $x3 + +... + +# Two chains across regmask. +--- +name: test6 +alignment: 4 +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x20, $x19, $x17, $x16, $x15, $x14, $x3, $x4 + ; CHECK-LABEL: name: test6 + ; CHECK: liveins: $x20, $x19, $x17, $x16, $x15, $x14, $x3, $x4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $x21 = COPY renamable $x20 + ; CHECK-NEXT: renamable $x18 = COPY renamable $x17 + ; CHECK-NEXT: $x17 = COPY renamable $x3 + ; CHECK-NEXT: $x20 = COPY renamable $x4 + ; CHECK-NEXT: BL8_NOP @foo, csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit-def $x3, implicit $x3, implicit-def $x4, implicit $x4 + ; CHECK-NEXT: renamable $x3 = COPY $x17 + ; CHECK-NEXT: renamable $x4 = COPY $x20 + ; CHECK-NEXT: BLR8 implicit $lr8, implicit undef $rm, implicit $x3, implicit $x4 + renamable $x21 = COPY renamable $x20 + renamable $x18 = COPY renamable $x17 + renamable $x17 = COPY renamable $x16 + renamable $x16 = COPY renamable $x15 + renamable $x20 = COPY renamable $x19 + renamable $x15 = COPY renamable $x14 + renamable $x14 = COPY renamable $x3 + renamable $x19 = COPY renamable $x4 + BL8_NOP @foo, csr_ppc64, implicit-def dead $lr8, implicit $rm, implicit-def $x3, implicit $x3, implicit-def $x4, implicit $x4 + renamable $x3 = COPY renamable $x14 + renamable $x14 = COPY renamable $x15 + renamable $x4 = COPY renamable $x19 + renamable $x15 = COPY renamable $x16 + renamable $x19 = COPY renamable $x20 + renamable $x16 = COPY renamable $x17 + renamable $x20 = COPY renamable $x21 + renamable $x17 = COPY renamable $x18 + BLR8 implicit $lr8, implicit undef $rm, implicit $x3, implicit $x4 + +... + diff --git a/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll --- a/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll +++ b/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=ALL,RV64I -; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=ALL,RV32I +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=RV64I +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=RV32I define void @lshr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: lshr_4bytes: @@ -3924,17 +3924,11 @@ ; RV32I-NEXT: .LBB11_57: ; RV32I-NEXT: slti s8, s5, 0 ; RV32I-NEXT: mv t1, a4 -; RV32I-NEXT: mv a4, t2 -; RV32I-NEXT: mv t2, s6 -; RV32I-NEXT: mv s6, s1 -; RV32I-NEXT: mv s1, ra +; RV32I-NEXT: mv a4, ra ; RV32I-NEXT: srl ra, a3, s3 ; RV32I-NEXT: neg s8, s8 ; RV32I-NEXT: and s8, s8, ra -; RV32I-NEXT: mv ra, s1 -; RV32I-NEXT: mv s1, s6 -; RV32I-NEXT: mv s6, t2 -; RV32I-NEXT: mv t2, a4 +; RV32I-NEXT: mv ra, a4 ; RV32I-NEXT: mv a4, t1 ; RV32I-NEXT: li t1, 64 ; RV32I-NEXT: or s8, s8, s7 @@ -4202,5 +4196,3 @@ store i256 %res, ptr %dst, align 1 ret void } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; ALL: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll --- a/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll +++ b/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=ALL,RV64I -; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=ALL,RV32I +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=RV64I +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=RV32I define void @lshr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: lshr_4bytes: @@ -3889,15 +3889,11 @@ ; RV32I-NEXT: .LBB11_57: ; RV32I-NEXT: slti s8, s5, 0 ; RV32I-NEXT: mv t1, t2 -; RV32I-NEXT: mv t2, s6 -; RV32I-NEXT: mv s6, s1 -; RV32I-NEXT: mv s1, ra +; RV32I-NEXT: mv t2, ra ; RV32I-NEXT: srl ra, a3, s3 ; RV32I-NEXT: neg s8, s8 ; RV32I-NEXT: and s8, s8, ra -; RV32I-NEXT: mv ra, s1 -; RV32I-NEXT: mv s1, s6 -; RV32I-NEXT: mv s6, t2 +; RV32I-NEXT: mv ra, t2 ; RV32I-NEXT: mv t2, t1 ; RV32I-NEXT: li t1, 64 ; RV32I-NEXT: or s8, s8, s7 @@ -4164,5 +4160,3 @@ store i256 %res, ptr %dst, align 1 ret void } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; ALL: {{.*}} diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll @@ -68,17 +68,13 @@ ; CHECK-NEXT: vshl.i16 q2, q0, #3 ; CHECK-NEXT: vand q3, q1, q5 ; CHECK-NEXT: vmov q1, q7 -; CHECK-NEXT: vand q2, q2, q6 -; CHECK-NEXT: vmov q7, q6 -; CHECK-NEXT: vmov q6, q5 -; CHECK-NEXT: vmov q5, q4 +; CHECK-NEXT: vmov q7, q4 ; CHECK-NEXT: vldrw.u32 q4, [sp, #48] @ 16-byte Reload +; CHECK-NEXT: vand q2, q2, q6 ; CHECK-NEXT: vshr.u16 q0, q0, #9 ; CHECK-NEXT: vmla.i16 q4, q2, r2 ; CHECK-NEXT: vshr.u16 q2, q4, #11 -; CHECK-NEXT: vmov q4, q5 -; CHECK-NEXT: vmov q5, q6 -; CHECK-NEXT: vmov q6, q7 +; CHECK-NEXT: vmov q4, q7 ; CHECK-NEXT: vmov q7, q1 ; CHECK-NEXT: vorr q1, q3, q2 ; CHECK-NEXT: vldrw.u32 q2, [sp, #16] @ 16-byte Reload diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll --- a/llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll +++ b/llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll @@ -1123,13 +1123,10 @@ ; CHECK-NEXT: vldrwt.u32 q0, [r11] ; CHECK-NEXT: vstrw.32 q6, [sp, #40] @ 16-byte Spill ; CHECK-NEXT: vmov q6, q5 -; CHECK-NEXT: vpst +; CHECK-NEXT: vpstt ; CHECK-NEXT: vfmat.f32 q1, q0, q7 -; CHECK-NEXT: vmov q5, q4 -; CHECK-NEXT: vmov q4, q3 -; CHECK-NEXT: vmov q3, q1 -; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q0, [r6] +; CHECK-NEXT: vmov q5, q1 ; CHECK-NEXT: vldrw.u32 q1, [sp, #56] @ 16-byte Reload ; CHECK-NEXT: adds r7, r6, r5 ; CHECK-NEXT: vpstt @@ -1137,13 +1134,11 @@ ; CHECK-NEXT: vldrwt.u32 q0, [r7] ; CHECK-NEXT: adds r6, r7, r5 ; CHECK-NEXT: vstrw.32 q1, [sp, #56] @ 16-byte Spill -; CHECK-NEXT: vmov q1, q3 -; CHECK-NEXT: vmov q3, q4 ; CHECK-NEXT: vpstt ; CHECK-NEXT: vfmat.f32 q3, q0, q7 ; CHECK-NEXT: vldrwt.u32 q0, [r6] -; CHECK-NEXT: vmov q4, q5 ; CHECK-NEXT: adds r7, r6, r5 +; CHECK-NEXT: vmov q1, q5 ; CHECK-NEXT: vpstt ; CHECK-NEXT: vfmat.f32 q4, q0, q7 ; CHECK-NEXT: vldrwt.u32 q0, [r7] @@ -1412,9 +1407,7 @@ ; CHECK-NEXT: vmov q6, q5 ; CHECK-NEXT: vpst ; CHECK-NEXT: vfmat.f32 q7, q1, q0 -; CHECK-NEXT: vmov q5, q3 -; CHECK-NEXT: vmov q3, q4 -; CHECK-NEXT: vmov q4, q2 +; CHECK-NEXT: vmov q5, q2 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q1, [r6] ; CHECK-NEXT: vldrw.u32 q2, [sp, #56] @ 16-byte Reload @@ -1430,23 +1423,20 @@ ; CHECK-NEXT: vldrwt.u32 q1, [r6] ; CHECK-NEXT: adds r7, r6, r5 ; CHECK-NEXT: vstrw.32 q2, [sp, #72] @ 16-byte Spill -; CHECK-NEXT: vmov q2, q4 -; CHECK-NEXT: vmov q4, q3 +; CHECK-NEXT: vmov q2, q5 +; CHECK-NEXT: adds r6, r7, r5 ; CHECK-NEXT: vpstt ; CHECK-NEXT: vfmat.f32 q2, q1, q0 ; CHECK-NEXT: vldrwt.u32 q1, [r7] -; CHECK-NEXT: adds r6, r7, r5 -; CHECK-NEXT: vmov q3, q5 +; CHECK-NEXT: vmov q5, q6 +; CHECK-NEXT: vldrw.u32 q6, [sp, #40] @ 16-byte Reload ; CHECK-NEXT: vpstt ; CHECK-NEXT: vfmat.f32 q4, q1, q0 ; CHECK-NEXT: vldrwt.u32 q1, [r6] -; CHECK-NEXT: vmov q5, q6 ; CHECK-NEXT: add r6, r5 -; CHECK-NEXT: vpstt +; CHECK-NEXT: vpsttt ; CHECK-NEXT: vfmat.f32 q5, q1, q0 ; CHECK-NEXT: vldrwt.u32 q1, [r6] -; CHECK-NEXT: vldrw.u32 q6, [sp, #40] @ 16-byte Reload -; CHECK-NEXT: vpst ; CHECK-NEXT: vfmat.f32 q3, q1, q0 ; CHECK-NEXT: le lr, .LBB7_3 ; CHECK-NEXT: @ %bb.4: @ %middle.block