Index: llvm/lib/CodeGen/RegisterCoalescer.cpp =================================================================== --- llvm/lib/CodeGen/RegisterCoalescer.cpp +++ llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -2103,6 +2103,7 @@ LLVM_DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask) << ")\n"); LIS->shrinkToUses(S, LI.reg()); + ShrinkMainRange = true; } LI.removeEmptySubRanges(); } Index: llvm/test/CodeGen/AMDGPU/coalesce-into-dead-subreg-copies.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/coalesce-into-dead-subreg-copies.mir @@ -0,0 +1,33 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-coalescing -run-pass=simple-register-coalescing -o - %s | FileCheck %s + +# Check that there's no "Live segment doesn't end at a valid +# instruction" failure after coalescing %0 into %2, which is +# ultimately a pair of dead copies. + +--- +name: coalesce_into_dead_subreg_copy +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + occupancy: 8 +body: | + ; CHECK-LABEL: name: coalesce_into_dead_subreg_copy + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead %2:sreg_64_xexec = S_LOAD_DWORDX2_IMM undef %1:sgpr_64, 24, 0 :: (dereferenceable invariant load (s64), addrspace 4) + ; CHECK-NEXT: S_BRANCH %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + bb.0: + %0:sreg_64_xexec = S_LOAD_DWORDX2_IMM undef %1:sgpr_64, 24, 0 :: (dereferenceable invariant load (s64), addrspace 4) + undef %2.sub0:sreg_64 = COPY %0.sub0:sreg_64_xexec + %2.sub1:sreg_64 = COPY killed %0.sub1:sreg_64_xexec + S_BRANCH %bb.1 + + bb.1: + +... Index: llvm/test/CodeGen/AMDGPU/loop-live-out-copy-undef-subrange.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/loop-live-out-copy-undef-subrange.ll +++ llvm/test/CodeGen/AMDGPU/loop-live-out-copy-undef-subrange.ll @@ -10,7 +10,7 @@ ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: v_add_f32_e32 v3, v2, v2 -; CHECK-NEXT: v_add_f32_e32 v1, v1, v1 +; CHECK-NEXT: ; kill: killed $vgpr1 ; CHECK-NEXT: v_add_f32_e32 v0, v0, v0 ; CHECK-NEXT: .LBB0_1: ; %bb1 ; CHECK-NEXT: ; =>This Loop Header: Depth=1