diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp --- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -134,28 +134,31 @@ const TargetInstrInfo &TII, bool UseCopyInstr) { // Since Reg might be a subreg of some registers, only invalidate Reg is not // enough. We have to find the COPY defines Reg or registers defined by Reg - // and invalidate all of them. - SmallSet RegsToInvalidate; - RegsToInvalidate.insert(Reg); + // and invalidate all of them. Similarly, we must invalidate all of the + // the subregisters used in the source of the COPY. + SmallSet RegUnitsToInvalidate; + auto InvalidateCopy = [&](MachineInstr *MI) { + std::optional CopyOperands = + isCopyInstr(*MI, TII, UseCopyInstr); + assert(CopyOperands && "Expect copy"); + + auto Dest = TRI.regunits(CopyOperands->Destination->getReg().asMCReg()); + auto Src = TRI.regunits(CopyOperands->Source->getReg().asMCReg()); + RegUnitsToInvalidate.insert(Dest.begin(), Dest.end()); + RegUnitsToInvalidate.insert(Src.begin(), Src.end()); + }; + for (MCRegUnit Unit : TRI.regunits(Reg)) { auto I = Copies.find(Unit); if (I != Copies.end()) { - if (MachineInstr *MI = I->second.MI) { - std::optional CopyOperands = - isCopyInstr(*MI, TII, UseCopyInstr); - assert(CopyOperands && "Expect copy"); - - RegsToInvalidate.insert( - CopyOperands->Destination->getReg().asMCReg()); - RegsToInvalidate.insert(CopyOperands->Source->getReg().asMCReg()); - } - RegsToInvalidate.insert(I->second.DefRegs.begin(), - I->second.DefRegs.end()); + if (MachineInstr *MI = I->second.MI) + InvalidateCopy(MI); + if (MachineInstr *MI = I->second.LastSeenUseInCopy) + InvalidateCopy(MI); } } - for (MCRegister InvalidReg : RegsToInvalidate) - for (MCRegUnit Unit : TRI.regunits(InvalidReg)) - Copies.erase(Unit); + for (MCRegUnit Unit : RegUnitsToInvalidate) + Copies.erase(Unit); } /// Clobber a single register, removing it from the tracker's copy maps. diff --git a/llvm/test/CodeGen/AMDGPU/mcp-use-before-def.mir b/llvm/test/CodeGen/AMDGPU/mcp-use-before-def.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/mcp-use-before-def.mir @@ -0,0 +1,26 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=machine-cp -o - %s | FileCheck %s + +# machine copy prop should not introduce use before def +--- +name: back_copy_block +body: | + bb.0: + ; CHECK-LABEL: name: back_copy_block + ; CHECK: $vgpr20_vgpr21_vgpr22_vgpr23 = IMPLICIT_DEF + ; CHECK-NEXT: $vgpr10_vgpr11_vgpr12_vgpr13 = IMPLICIT_DEF + ; CHECK-NEXT: renamable $vgpr0_vgpr1 = V_MOV_B64_e64 killed $vgpr20_vgpr21, implicit $exec + ; CHECK-NEXT: renamable $vgpr20 = V_MOV_B32_e32 killed $vgpr1, implicit $exec + ; CHECK-NEXT: renamable $vgpr6_vgpr7_vgpr8_vgpr9 = COPY renamable $vgpr10_vgpr11_vgpr12_vgpr13 + ; CHECK-NEXT: renamable $vgpr20 = V_MOV_B32_e32 killed $vgpr6, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, amdgpu_allvgprs + $vgpr20_vgpr21_vgpr22_vgpr23 = IMPLICIT_DEF + $vgpr10_vgpr11_vgpr12_vgpr13 = IMPLICIT_DEF + renamable $vgpr0_vgpr1 = V_MOV_B64_e64 killed renamable $vgpr20_vgpr21, implicit $exec + renamable $vgpr20 = V_MOV_B32_e32 killed renamable $vgpr1, implicit $exec + renamable $vgpr6_vgpr7_vgpr8_vgpr9 = COPY killed renamable $vgpr10_vgpr11_vgpr12_vgpr13 + renamable $vgpr14_vgpr15 = COPY killed renamable $vgpr0_vgpr1 + renamable $vgpr20 = V_MOV_B32_e32 killed renamable $vgpr6, implicit $exec + renamable $vgpr1_vgpr2_vgpr3_vgpr4 = COPY killed renamable $vgpr6_vgpr7_vgpr8_vgpr9 + S_ENDPGM 0, amdgpu_allvgprs +...