diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp --- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -432,6 +432,15 @@ if (hasImplicitOverlap(MI, MOUse)) continue; + // Check that the instruction is not a copy that partially overwrites the + // original copy source that we are about to use. The tracker mechanism + // cannot cope with that. + if (MI.isCopy() && MI.modifiesRegister(CopySrcReg, TRI) && + !MI.definesRegister(CopySrcReg)) { + LLVM_DEBUG(dbgs() << "MCP: Copy source overlap with dest in " << MI); + continue; + } + if (!DebugCounter::shouldExecute(FwdCounter)) { LLVM_DEBUG(dbgs() << "MCP: Skipping forwarding due to debug counter:\n " << MI); diff --git a/llvm/test/CodeGen/AMDGPU/mcp-overlap-after-propagation.mir b/llvm/test/CodeGen/AMDGPU/mcp-overlap-after-propagation.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/mcp-overlap-after-propagation.mir @@ -0,0 +1,27 @@ +# RUN: llc -march=amdgcn -mcpu=gfx1010 %s -o - -run-pass machine-cp -verify-machineinstrs | FileCheck %s +# +# The MachineCopyPropagation bug being tested propagates s[60:67] into the copy +# into s[56:63], and then uses s[60:67] in the following +# IMAGE_SAMPLE_V3_V2_gfx10, even though it has just overwritten half of it. + +# CHECK-LABEL: name: _amdgpu_ps_main +# CHECK-NOT: IMAGE_SAMPLE_V3_V2_gfx10 {{.*}} $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 +# CHECK: IMAGE_SAMPLE_V3_V2_gfx10 {{.*}} $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + +--- +name: _amdgpu_ps_main +body: | + bb.0: + successors: + liveins: $sgpr2, $sgpr3, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr5, $vgpr70, $vgpr71 + + renamable $sgpr8_sgpr9 = S_GETPC_B64 + renamable $sgpr8 = COPY killed renamable $sgpr2 + renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 = S_LOAD_DWORDX8_IMM renamable $sgpr8_sgpr9, 144, 0, 0 :: (invariant load 32, align 16, addrspace 4) + renamable $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = COPY killed renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 + renamable $vgpr4 = IMAGE_GET_LOD_V1_V2_gfx10 renamable $vgpr70_vgpr71, renamable $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, renamable $sgpr96_sgpr97_sgpr98_sgpr99, 2, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec + renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63 = COPY killed renamable $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + renamable $vgpr12_vgpr13_vgpr14 = IMAGE_SAMPLE_V3_V2_gfx10 renamable $vgpr70_vgpr71, renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, renamable $sgpr96_sgpr97_sgpr98_sgpr99, 7, 1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 12, align 16) + S_ENDPGM 0 + +...