Index: llvm/lib/CodeGen/RegisterCoalescer.cpp =================================================================== --- llvm/lib/CodeGen/RegisterCoalescer.cpp +++ llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -1641,18 +1641,20 @@ SlotIndex RegIndex = Idx.getRegSlot(); LiveRange::Segment *Seg = DstLI.getSegmentContaining(RegIndex); assert(Seg != nullptr && "No segment for defining instruction"); - if (VNInfo *V = DstLI.getVNInfoAt(Seg->end)) { - if (V->isPHIDef()) { - CopyMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); - for (unsigned i = CopyMI->getNumOperands(); i != 0; --i) { - MachineOperand &MO = CopyMI->getOperand(i-1); - if (MO.isReg() && MO.isUse()) - CopyMI->removeOperand(i-1); - } - LLVM_DEBUG(dbgs() << "\tReplaced copy of value with an " - "implicit def\n"); - return CopyMI; + VNInfo *V = DstLI.getVNInfoAt(Seg->end); + + // The source interval may also have been on an undef use, in which case the + // copy introduced a live value. + if ((V && V->isPHIDef()) || (!V && !DstLI.liveAt(Idx) && !SrcLI.liveAt(Idx))) { + CopyMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); + for (unsigned i = CopyMI->getNumOperands(); i != 0; --i) { + MachineOperand &MO = CopyMI->getOperand(i-1); + if (MO.isReg() && MO.isUse()) + CopyMI->removeOperand(i-1); } + LLVM_DEBUG(dbgs() << "\tReplaced copy of value with an " + "implicit def\n"); + return CopyMI; } // Remove any DstReg segments starting at the instruction. Index: llvm/test/CodeGen/AMDGPU/coalesce-liveout-undef-copy.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/coalesce-liveout-undef-copy.mir @@ -0,0 +1,61 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-coalescing -run-pass=simple-register-coalescing -o - %s | FileCheck %s + +# %2 has an undef read in %bb.3, and this IR wouldn't be valid if it +# was a real read. After merging %2 into %0, we need to replace the +# copy of undef with an implicit_def since the copy introduced a new +# value. + +--- +name: coalesce_into_undef_copy +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' +body: | + ; CHECK-LABEL: name: coalesce_into_undef_copy + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_128_align2 = COPY undef %1:sgpr_128, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead %2:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY]].sub0:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead %4:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V4 [[COPY]], undef %5:sgpr_32, 11, implicit-def $m0, implicit $m0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + bb.0: + %0:vreg_128_align2 = COPY undef %1:sgpr_128, implicit $exec + S_BRANCH %bb.1 + + bb.1: + %2:vreg_128_align2 = IMPLICIT_DEF + %3:vreg_128_align2 = COPY killed %0 + %3.sub0:vreg_128_align2 = IMPLICIT_DEF + + bb.2: + dead %5:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V4 %3, undef %6:sgpr_32, 11, implicit-def $m0, implicit $m0, implicit $exec + S_BRANCH %bb.2 + + bb.3: + %0:vreg_128_align2 = COPY undef %2 + S_CBRANCH_EXECNZ %bb.1, implicit $exec + + bb.4: + +... Index: llvm/test/CodeGen/AMDGPU/undef-subreg-use-after-coalesce.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/undef-subreg-use-after-coalesce.mir +++ llvm/test/CodeGen/AMDGPU/undef-subreg-use-after-coalesce.mir @@ -32,7 +32,8 @@ ; CHECK-LABEL: name: undef_subreg_use_after_full_copy_coalesce_composed ; CHECK: undef %0.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec ; CHECK-NEXT: dead %0.sub1:vreg_128 = V_MOV_B32_e32 0, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0, implicit undef %2.sub1:vreg_64 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: S_ENDPGM 0, implicit [[DEF]].sub1 undef %0.sub0:vreg_128 = V_MOV_B32_e32 0, implicit $exec %0.sub1:vreg_128 = V_MOV_B32_e32 0, implicit $exec %1:vreg_128 = COPY killed %0