diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp --- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -109,6 +109,7 @@ struct CopyInfo { MachineInstr *MI, *LastSeenUseInCopy; SmallVector DefRegs; + SmallVector SrcRegs; bool Avail; }; @@ -151,6 +152,8 @@ } RegsToInvalidate.insert(I->second.DefRegs.begin(), I->second.DefRegs.end()); + RegsToInvalidate.insert(I->second.SrcRegs.begin(), + I->second.SrcRegs.end()); } } for (MCRegister InvalidReg : RegsToInvalidate) @@ -167,6 +170,7 @@ // When we clobber the source of a copy, we need to clobber everything // it defined. markRegsUnavailable(I->second.DefRegs, TRI); + markRegsUnavailable(I->second.SrcRegs, TRI); // When we clobber the destination of a copy, we need to clobber the // whole register it defined. if (MachineInstr *MI = I->second.MI) { @@ -193,15 +197,17 @@ // Remember Def is defined by the copy. for (MCRegUnit Unit : TRI.regunits(Def)) - Copies[Unit] = {MI, nullptr, {}, true}; + Copies[Unit] = {MI, nullptr, {}, {}, true}; // Remember source that's copied to Def. Once it's clobbered, then // it's no longer available for copy propagation. for (MCRegUnit Unit : TRI.regunits(Src)) { - auto I = Copies.insert({Unit, {nullptr, nullptr, {}, false}}); + auto I = Copies.insert({Unit, {nullptr, nullptr, {}, {}, false}}); auto &Copy = I.first->second; if (!is_contained(Copy.DefRegs, Def)) Copy.DefRegs.push_back(Def); + if (!is_contained(Copy.SrcRegs, Src)) + Copy.SrcRegs.push_back(Src); Copy.LastSeenUseInCopy = MI; } } diff --git a/llvm/test/CodeGen/AMDGPU/mcp-use-before-def.mir b/llvm/test/CodeGen/AMDGPU/mcp-use-before-def.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/mcp-use-before-def.mir @@ -0,0 +1,28 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=machine-cp -o - %s | FileCheck %s + +# machine copy prop should not introduce use before def +--- +name: back_copy_block +body: | + bb.0: + ; CHECK-LABEL: name: back_copy_block + ; CHECK: $vgpr20_vgpr21_vgpr22_vgpr23 = IMPLICIT_DEF + ; CHECK-NEXT: $vgpr10_vgpr11_vgpr12_vgpr13 = IMPLICIT_DEF + ; CHECK-NEXT: renamable $vgpr0_vgpr1 = V_MOV_B64_e64 killed $vgpr20_vgpr21, implicit $exec + ; CHECK-NEXT: renamable $vgpr20 = V_MOV_B32_e32 killed $vgpr1, implicit $exec + ; CHECK-NEXT: renamable $vgpr6_vgpr7_vgpr8_vgpr9 = COPY renamable $vgpr10_vgpr11_vgpr12_vgpr13 + ; CHECK-NEXT: renamable $vgpr20 = V_MOV_B32_e32 killed $vgpr6, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, amdgpu_allvgprs + $vgpr20_vgpr21_vgpr22_vgpr23 = IMPLICIT_DEF + $vgpr10_vgpr11_vgpr12_vgpr13 = IMPLICIT_DEF + renamable $vgpr0_vgpr1 = V_MOV_B64_e64 killed renamable $vgpr20_vgpr21, implicit $exec + renamable $vgpr20 = V_MOV_B32_e32 killed renamable $vgpr1, implicit $exec + renamable $vgpr6_vgpr7_vgpr8_vgpr9 = COPY killed renamable $vgpr10_vgpr11_vgpr12_vgpr13 + renamable $vgpr14_vgpr15 = COPY killed renamable $vgpr0_vgpr1 + renamable $vgpr20 = V_MOV_B32_e32 killed renamable $vgpr6, implicit $exec + renamable $vgpr1_vgpr2_vgpr3_vgpr4 = COPY killed renamable $vgpr6_vgpr7_vgpr8_vgpr9 + S_ENDPGM 0, amdgpu_allvgprs +... +## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +# CHECK: {{.*}}