diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp --- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -108,7 +108,7 @@ class CopyTracker { struct CopyInfo { MachineInstr *MI, *LastSeenUseInCopy; - SmallVector DefRegs; + SmallVector DefRegs; bool Avail; }; @@ -117,15 +117,13 @@ public: /// Mark all of the given registers and their subregisters as unavailable for /// copying. - void markRegsUnavailable(ArrayRef Regs, + void markRegsUnavailable(ArrayRef Regs, const TargetRegisterInfo &TRI) { - for (MCRegister Reg : Regs) { - // Source of copy is no longer available for propagation. - for (MCRegUnit Unit : TRI.regunits(Reg)) { - auto CI = Copies.find(Unit); - if (CI != Copies.end()) - CI->second.Avail = false; - } + // Source of copy is no longer available for propagation. + for (MCRegUnit Unit : Regs) { + auto CI = Copies.find(Unit); + if (CI != Copies.end()) + CI->second.Avail = false; } } @@ -134,28 +132,31 @@ const TargetInstrInfo &TII, bool UseCopyInstr) { // Since Reg might be a subreg of some registers, only invalidate Reg is not // enough. We have to find the COPY defines Reg or registers defined by Reg - // and invalidate all of them. - SmallSet RegsToInvalidate; - RegsToInvalidate.insert(Reg); + // and invalidate all of them. Similarly, we must invalidate all of the + // the subregisters used in the source of the COPY. + SmallSet RegsToInvalidate; + auto InvalidateCopy = [&](MachineInstr *MI) { + std::optional CopyOperands = + isCopyInstr(*MI, TII, UseCopyInstr); + assert(CopyOperands && "Expect copy"); + + auto Dest = TRI.regunits(CopyOperands->Destination->getReg().asMCReg()); + auto Src = TRI.regunits(CopyOperands->Source->getReg().asMCReg()); + RegsToInvalidate.insert(Dest.begin(), Dest.end()); + RegsToInvalidate.insert(Src.begin(), Src.end()); + }; + for (MCRegUnit Unit : TRI.regunits(Reg)) { auto I = Copies.find(Unit); if (I != Copies.end()) { - if (MachineInstr *MI = I->second.MI) { - std::optional CopyOperands = - isCopyInstr(*MI, TII, UseCopyInstr); - assert(CopyOperands && "Expect copy"); - - RegsToInvalidate.insert( - CopyOperands->Destination->getReg().asMCReg()); - RegsToInvalidate.insert(CopyOperands->Source->getReg().asMCReg()); - } - RegsToInvalidate.insert(I->second.DefRegs.begin(), - I->second.DefRegs.end()); + if (MachineInstr *MI = I->second.MI) + InvalidateCopy(MI); + if (MachineInstr *MI = I->second.LastSeenUseInCopy) + InvalidateCopy(MI); } } - for (MCRegister InvalidReg : RegsToInvalidate) - for (MCRegUnit Unit : TRI.regunits(InvalidReg)) - Copies.erase(Unit); + for (MCRegUnit Unit : RegsToInvalidate) + Copies.erase(Unit); } /// Clobber a single register, removing it from the tracker's copy maps. @@ -166,14 +167,19 @@ if (I != Copies.end()) { // When we clobber the source of a copy, we need to clobber everything // it defined. - markRegsUnavailable(I->second.DefRegs, TRI); + SmallVector Dest(I->second.DefRegs.begin(), + I->second.DefRegs.end()); + markRegsUnavailable(Dest, TRI); + // When we clobber the destination of a copy, we need to clobber the // whole register it defined. if (MachineInstr *MI = I->second.MI) { std::optional CopyOperands = isCopyInstr(*MI, TII, UseCopyInstr); - markRegsUnavailable({CopyOperands->Destination->getReg().asMCReg()}, - TRI); + auto DestUnits = + TRI.regunits(CopyOperands->Destination->getReg().asMCReg()); + SmallVector Dest(DestUnits.begin(), DestUnits.end()); + markRegsUnavailable(Dest, TRI); } // Now we can erase the copy. Copies.erase(I); @@ -188,20 +194,22 @@ isCopyInstr(*MI, TII, UseCopyInstr); assert(CopyOperands && "Tracking non-copy?"); - MCRegister Src = CopyOperands->Source->getReg().asMCReg(); - MCRegister Def = CopyOperands->Destination->getReg().asMCReg(); - + auto Src = TRI.regunits(CopyOperands->Source->getReg().asMCReg()); + auto Def = TRI.regunits(CopyOperands->Destination->getReg().asMCReg()); // Remember Def is defined by the copy. - for (MCRegUnit Unit : TRI.regunits(Def)) + for (MCRegUnit Unit : Def) Copies[Unit] = {MI, nullptr, {}, true}; // Remember source that's copied to Def. Once it's clobbered, then // it's no longer available for copy propagation. - for (MCRegUnit Unit : TRI.regunits(Src)) { + for (MCRegUnit Unit : Src) { auto I = Copies.insert({Unit, {nullptr, nullptr, {}, false}}); auto &Copy = I.first->second; - if (!is_contained(Copy.DefRegs, Def)) - Copy.DefRegs.push_back(Def); + std::for_each(Def.begin(), Def.end(), [&Copy](MCRegUnit DefUnit) { + if (!is_contained(Copy.DefRegs, DefUnit)) + Copy.DefRegs.push_back(DefUnit); + }); + Copy.LastSeenUseInCopy = MI; } } @@ -210,7 +218,7 @@ return !Copies.empty(); } - MachineInstr *findCopyForUnit(MCRegister RegUnit, + MachineInstr *findCopyForUnit(MCRegUnit RegUnit, const TargetRegisterInfo &TRI, bool MustBeAvailable = false) { auto CI = Copies.find(RegUnit); @@ -221,15 +229,21 @@ return CI->second.MI; } - MachineInstr *findCopyDefViaUnit(MCRegister RegUnit, + MachineInstr *findCopyDefViaUnit(MCRegUnit RegUnit, const TargetRegisterInfo &TRI) { auto CI = Copies.find(RegUnit); if (CI == Copies.end()) return nullptr; - if (CI->second.DefRegs.size() != 1) + if (!CI->second.DefRegs.size()) return nullptr; - MCRegUnit RU = *TRI.regunits(CI->second.DefRegs[0]).begin(); - return findCopyForUnit(RU, TRI, true); + MCRegUnit RU = CI->second.DefRegs[0]; + auto CandidateCopy = findCopyForUnit(RU, TRI, true); + for (size_t I = 1; I < CI->second.DefRegs.size(); I++) { + MCRegUnit NextRU = CI->second.DefRegs[I]; + if (findCopyForUnit(NextRU, TRI, true) != CandidateCopy) + return nullptr; + } + return CandidateCopy; } MachineInstr *findAvailBackwardCopy(MachineInstr &I, MCRegister Reg, diff --git a/llvm/test/CodeGen/AMDGPU/mcp-use-before-def.mir b/llvm/test/CodeGen/AMDGPU/mcp-use-before-def.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/mcp-use-before-def.mir @@ -0,0 +1,26 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=machine-cp -o - %s | FileCheck %s + +# machine copy prop should not introduce use before def +--- +name: back_copy_block +body: | + bb.0: + ; CHECK-LABEL: name: back_copy_block + ; CHECK: $vgpr20_vgpr21_vgpr22_vgpr23 = IMPLICIT_DEF + ; CHECK-NEXT: $vgpr10_vgpr11_vgpr12_vgpr13 = IMPLICIT_DEF + ; CHECK-NEXT: renamable $vgpr0_vgpr1 = V_MOV_B64_e64 killed $vgpr20_vgpr21, implicit $exec + ; CHECK-NEXT: renamable $vgpr20 = V_MOV_B32_e32 killed $vgpr1, implicit $exec + ; CHECK-NEXT: renamable $vgpr6_vgpr7_vgpr8_vgpr9 = COPY renamable $vgpr10_vgpr11_vgpr12_vgpr13 + ; CHECK-NEXT: renamable $vgpr20 = V_MOV_B32_e32 killed $vgpr6, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, amdgpu_allvgprs + $vgpr20_vgpr21_vgpr22_vgpr23 = IMPLICIT_DEF + $vgpr10_vgpr11_vgpr12_vgpr13 = IMPLICIT_DEF + renamable $vgpr0_vgpr1 = V_MOV_B64_e64 killed renamable $vgpr20_vgpr21, implicit $exec + renamable $vgpr20 = V_MOV_B32_e32 killed renamable $vgpr1, implicit $exec + renamable $vgpr6_vgpr7_vgpr8_vgpr9 = COPY killed renamable $vgpr10_vgpr11_vgpr12_vgpr13 + renamable $vgpr14_vgpr15 = COPY killed renamable $vgpr0_vgpr1 + renamable $vgpr20 = V_MOV_B32_e32 killed renamable $vgpr6, implicit $exec + renamable $vgpr1_vgpr2_vgpr3_vgpr4 = COPY killed renamable $vgpr6_vgpr7_vgpr8_vgpr9 + S_ENDPGM 0, amdgpu_allvgprs +...