Index: llvm/lib/CodeGen/RegisterCoalescer.cpp =================================================================== --- llvm/lib/CodeGen/RegisterCoalescer.cpp +++ llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -211,6 +211,18 @@ /// live interval update is costly. void lateLiveIntervalUpdate(); + /// Check if the incoming value defined by a COPY at \p SLRQ in the subrange + /// has no value defined in the predecessors. If the incoming value is the + /// same as defined by the copy itself, the value is considered undefined. + bool copyValueUndefInPredecessors(LiveRange &S, + const MachineBasicBlock *MBB, + LiveQueryResult SLRQ); + + /// Set necessary undef flags on subregister uses after pruning out undef + /// lane segments from the subrange. + void setUndefOnPrunedSubRegUses(LiveInterval &LI, Register Reg, + LaneBitmask PrunedLanes); + /// Attempt to join intervals corresponding to SrcReg/DstReg, which are the /// src/dst of the copy instruction CopyMI. This returns true if the copy /// was successfully coalesced away. If it is not currently possible to @@ -1809,6 +1821,49 @@ return false; } +bool RegisterCoalescer::copyValueUndefInPredecessors( + LiveRange &S, const MachineBasicBlock *MBB, LiveQueryResult SLRQ) { + for (const MachineBasicBlock *Pred : MBB->predecessors()) { + SlotIndex PredEnd = LIS->getMBBEndIdx(Pred); + if (VNInfo *V = S.getVNInfoAt(PredEnd.getPrevSlot())) { + // If this is a self loop, we may be reading the same value. + if (V->id != SLRQ.valueOutOrDead()->id) + return false; + } + } + + return true; +} + +void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI, + Register Reg, + LaneBitmask PrunedLanes) { + // If we had other instructions in the segment reading the undef sublane + // value, we need to mark them with undef. + for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { + unsigned SubRegIdx = MO.getSubReg(); + if (SubRegIdx == 0 || MO.isUndef()) + continue; + + LaneBitmask SubRegMask = TRI->getSubRegIndexLaneMask(SubRegIdx); + SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent()); + for (LiveInterval::SubRange &S : LI.subranges()) { + if (!S.liveAt(Pos) && (PrunedLanes & SubRegMask).any()) { + MO.setIsUndef(); + break; + } + } + } + + LI.removeEmptySubRanges(); + + // A def of a subregister may be a use of other register lanes. Replacing + // such a def with a def of a different register will eliminate the use, + // and may cause the recorded live range to be larger than the actual + // liveness in the program IR. + LIS->shrinkToUses(&LI); +} + bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { Again = false; LLVM_DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI); @@ -1868,16 +1923,35 @@ VNInfo *ReadVNI = LRQ.valueIn(); assert(ReadVNI && "No value before copy and no flag."); assert(ReadVNI != DefVNI && "Cannot read and define the same value."); - LI.MergeValueNumberInto(DefVNI, ReadVNI); + + // Track incoming undef lanes we need to eliminate from the subrange. + LaneBitmask PrunedLanes; + MachineBasicBlock *MBB = CopyMI->getParent(); // Process subregister liveranges. for (LiveInterval::SubRange &S : LI.subranges()) { LiveQueryResult SLRQ = S.Query(CopyIdx); if (VNInfo *SDefVNI = SLRQ.valueDefined()) { - VNInfo *SReadVNI = SLRQ.valueIn(); - S.MergeValueNumberInto(SDefVNI, SReadVNI); + if (VNInfo *SReadVNI = SLRQ.valueIn()) + SDefVNI = S.MergeValueNumberInto(SDefVNI, SReadVNI); + + // If this copy introduced an undef subrange from an incoming value, + // we need to eliminate the undef live in values from the subrange. + if (copyValueUndefInPredecessors(S, MBB, SLRQ)) { + LLVM_DEBUG(dbgs() << "Incoming sublane value is undef at copy\n"); + PrunedLanes |= S.LaneMask; + S.removeValNo(SDefVNI); + } } } + + LI.MergeValueNumberInto(DefVNI, ReadVNI); + if (PrunedLanes.any()) { + LLVM_DEBUG(dbgs() << "Pruning undef incoming lanes: " + << PrunedLanes << '\n'); + setUndefOnPrunedSubRegUses(LI, CP.getSrcReg(), PrunedLanes); + } + LLVM_DEBUG(dbgs() << "\tMerged values: " << LI << '\n'); } deleteInstr(CopyMI); Index: llvm/test/CodeGen/AMDGPU/coalesce-identity-copies-undef-subregs.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/coalesce-identity-copies-undef-subregs.mir @@ -0,0 +1,338 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-coalescing -verify-machineinstrs -start-before=simple-register-coalescing -stop-after=machine-scheduler -o - %s | FileCheck %s + +# Tests that break due to the handling of partially undef registers +# when whole register identity copies are erased. + +# Make sure there is no verifier error after +# RenameIndepependentSubregs processes this. The coalescer would +# remove the identity copy in %bb.1, and leave behind a dummy interval +# across bb.1 with no corresponding value anywhere in the function. + +--- +name: identity_copy_undef_subrange +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: identity_copy_undef_subrange + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $vgpr0 + ; CHECK: undef %0.sub1:vreg_64 = COPY $vgpr0 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec + ; CHECK: S_BRANCH %bb.2 + ; CHECK: bb.2: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: undef %0.sub1:vreg_64 = nofpexcept V_CEIL_F32_e32 %0.sub1, implicit $mode, implicit $exec + ; CHECK: S_BRANCH %bb.1 + bb.0: + liveins: $vgpr0 + + undef %0.sub1:vreg_64 = COPY killed $vgpr0 + + bb.1: + successors: %bb.2, %bb.1 + + %0:vreg_64 = COPY %0 + S_CBRANCH_EXECNZ %bb.1, implicit $exec + S_BRANCH %bb.2 + + bb.2: + undef %0.sub1:vreg_64 = nofpexcept V_CEIL_F32_e32 killed %0.sub1, implicit $mode, implicit $exec + S_BRANCH %bb.1 + +... + +# Test another use of the register inside the block. +--- +name: identity_copy_undef_subrange_other_uses0 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: identity_copy_undef_subrange_other_uses0 + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $vgpr0 + ; CHECK: undef %0.sub1:vreg_64 = COPY $vgpr0 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK: S_NOP 0, implicit undef %0.sub0 + ; CHECK: S_NOP 0, implicit undef %0.sub0 + ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec + ; CHECK: S_BRANCH %bb.2 + ; CHECK: bb.2: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: undef %0.sub1:vreg_64 = nofpexcept V_CEIL_F32_e32 %0.sub1, implicit $mode, implicit $exec + ; CHECK: S_BRANCH %bb.1 + bb.0: + liveins: $vgpr0 + undef %0.sub1:vreg_64 = COPY killed $vgpr0 + + bb.1: + successors: %bb.2, %bb.1 + + %0:vreg_64 = COPY %0 + S_NOP 0, implicit %0.sub0 + S_NOP 0, implicit %0.sub0 + S_CBRANCH_EXECNZ %bb.1, implicit $exec + S_BRANCH %bb.2 + + bb.2: + undef %0.sub1:vreg_64 = nofpexcept V_CEIL_F32_e32 killed %0.sub1, implicit $mode, implicit $exec + S_BRANCH %bb.1 + +... + +--- +name: second_identity_copy +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: second_identity_copy + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $vgpr0 + ; CHECK: undef %0.sub1:vreg_64 = COPY $vgpr0 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec + ; CHECK: S_BRANCH %bb.2 + ; CHECK: bb.2: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: undef %0.sub1:vreg_64 = nofpexcept V_MUL_F32_e32 0, %0.sub1, implicit $mode, implicit $exec + ; CHECK: S_BRANCH %bb.1 + bb.0: + liveins: $vgpr0 + + undef %0.sub1:vreg_64 = COPY killed $vgpr0 + + bb.1: + successors: %bb.2, %bb.1 + + %0:vreg_64 = COPY killed %0 + %0:vreg_64 = COPY %0 + S_CBRANCH_EXECNZ %bb.1, implicit $exec + S_BRANCH %bb.2 + + bb.2: + undef %0.sub1:vreg_64 = nofpexcept V_MUL_F32_e32 0, killed %0.sub1, implicit $mode, implicit $exec + S_BRANCH %bb.1 + +... + +--- +name: second_identity_copy_undef_lane_outblock +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: second_identity_copy_undef_lane_outblock + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $vgpr0 + ; CHECK: undef %0.sub1:vreg_64 = COPY $vgpr0 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec + ; CHECK: S_BRANCH %bb.2 + ; CHECK: bb.2: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: %0.sub1:vreg_64 = nofpexcept V_MUL_F32_e32 0, %0.sub1, implicit $mode, implicit $exec + ; CHECK: S_BRANCH %bb.1 + bb.0: + liveins: $vgpr0 + + undef %0.sub1:vreg_64 = COPY killed $vgpr0 + + bb.1: + successors: %bb.2, %bb.1 + + %0:vreg_64 = COPY killed %0 + %0:vreg_64 = COPY %0 + S_CBRANCH_EXECNZ %bb.1, implicit $exec + S_BRANCH %bb.2 + + bb.2: + %0.sub1:vreg_64 = nofpexcept V_MUL_F32_e32 0, killed %0.sub1, implicit $mode, implicit $exec + S_BRANCH %bb.1 + +... + +# The same value number appears in multiple blocks +--- +name: identity_copy_undef_subrange_null_vninfo_to_remove +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: identity_copy_undef_subrange_null_vninfo_to_remove + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $vgpr0 + ; CHECK: undef %0.sub1:vreg_64 = COPY $vgpr0 + ; CHECK: bb.1: + ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK: S_CBRANCH_EXECNZ %bb.3, implicit $exec + ; CHECK: bb.2: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: S_NOP 0, implicit undef %0.sub0 + ; CHECK: undef %0.sub1:vreg_64 = nofpexcept V_CEIL_F32_e32 %0.sub1, implicit $mode, implicit $exec + ; CHECK: S_BRANCH %bb.1 + ; CHECK: bb.3: + ; CHECK: S_NOP 0, implicit undef %0.sub0 + bb.0: + liveins: $vgpr0 + + undef %0.sub1:vreg_64 = COPY killed $vgpr0 + + bb.1: + %0:vreg_64 = COPY %0 + S_CBRANCH_EXECNZ %bb.3, implicit $exec + + bb.2: + S_NOP 0, implicit %0.sub0 + undef %0.sub1:vreg_64 = nofpexcept V_CEIL_F32_e32 killed %0.sub1, implicit $mode, implicit $exec + S_BRANCH %bb.1 + + bb.3: + S_NOP 0, implicit %0.sub0 + +... + +--- +name: undef_copy_self_loop0 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: undef_copy_self_loop0 + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $vgpr0 + ; CHECK: undef %0.sub1:vreg_64 = COPY $vgpr0 + ; CHECK: bb.1: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec + ; CHECK: bb.2: + ; CHECK: S_NOP 0, implicit undef %0.sub0 + bb.0: + liveins: $vgpr0 + + undef %0.sub1:vreg_64 = COPY killed $vgpr0 + + bb.1: + %0:vreg_64 = COPY %0 + S_CBRANCH_EXECNZ %bb.1, implicit $exec + + bb.2: + S_NOP 0, implicit %0.sub0 + +... + +--- +name: undef_copy_self_loop1 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: undef_copy_self_loop1 + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $vgpr0 + ; CHECK: undef %0.sub1:vreg_64 = COPY $vgpr0 + ; CHECK: bb.1: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec + ; CHECK: bb.2: + ; CHECK: S_NOP 0, implicit %0.sub1 + bb.0: + liveins: $vgpr0 + + undef %0.sub1:vreg_64 = COPY killed $vgpr0 + + bb.1: + %0:vreg_64 = COPY %0 + S_CBRANCH_EXECNZ %bb.1, implicit $exec + + bb.2: + S_NOP 0, implicit %0.sub1 + +... + +# FIXME: This testcase is broken +# The coalescing of the %0 = %2 COPY in %bb.2 needs to prune the dead +# phi range across %bb.1 after it is erased. +# --- +# name: prune_subrange_phi_value_0 +# tracksRegLiveness: true +# body: | +# bb.0: +# liveins: $vgpr0 + +# undef %0.sub1:vreg_64 = COPY killed $vgpr0 + +# bb.1: +# successors: %bb.2, %bb.1 + +# %1:vreg_64 = COPY killed %0 +# %0:vreg_64 = COPY %1 +# S_CBRANCH_EXECNZ %bb.1, implicit $exec +# S_BRANCH %bb.2 + +# bb.2: +# undef %2.sub1:vreg_64 = COPY %0.sub1 +# %0:vreg_64 = COPY killed %2 +# S_BRANCH %bb.1 + +# ... + +# Variant of testcase that asserts since there wasn't already an +# incoming segment at the erased copy, and no valid end point. +# --- +# name: prune_subrange_phi_value_1 +# tracksRegLiveness: true +# body: | +# bb.0: +# liveins: $vgpr0 + +# undef %0.sub1:vreg_64 = COPY killed $vgpr0 + +# bb.1: +# successors: %bb.2, %bb.1 + +# %0:vreg_64 = COPY killed %0 +# S_CBRANCH_EXECNZ %bb.1, implicit $exec +# S_BRANCH %bb.2 + +# bb.2: +# undef %1.sub1:vreg_64 = nofpexcept V_CEIL_F32_e32 killed %0.sub1, implicit $mode, implicit $exec +# %0:vreg_64 = COPY killed %1 +# S_BRANCH %bb.1 + +# ... + +--- +name: prune_subrange_phi_value_2 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: prune_subrange_phi_value_2 + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $vgpr0 + ; CHECK: undef %0.sub1:vreg_64 = COPY $vgpr0 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec + ; CHECK: S_BRANCH %bb.2 + ; CHECK: bb.2: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: %0.sub1:vreg_64 = nofpexcept V_CEIL_F32_e32 %0.sub1, implicit $mode, implicit $exec + ; CHECK: S_BRANCH %bb.1 + bb.0: + liveins: $vgpr0 + + undef %0.sub1:vreg_64 = COPY killed $vgpr0 + + bb.1: + successors: %bb.2, %bb.1 + + %0:vreg_64 = COPY killed %0 + S_CBRANCH_EXECNZ %bb.1, implicit $exec + S_BRANCH %bb.2 + + bb.2: + %0.sub1:vreg_64 = nofpexcept V_CEIL_F32_e32 %0.sub1, implicit $mode, implicit $exec + S_BRANCH %bb.1 + +...