diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -184,6 +184,31 @@ if (isDefBetween(*TRI, LIS, CCReg, *Sel, *And)) return false; + // Cannot safely mirror live intervals with PHI nodes, so check for these + // before optimization. + SlotIndex CmpIdx = LIS->getInstructionIndex(*Cmp); + SlotIndex SelIdx = LIS->getInstructionIndex(*Sel); + + LiveInterval *CmpLI = + CmpReg.isVirtual() ? &LIS->getInterval(CmpReg) : nullptr; + LiveInterval *SelLI = + SelReg.isVirtual() ? &LIS->getInterval(SelReg) : nullptr; + + if (SelLI) { + if (llvm::any_of(SelLI->vnis(), + [&](const VNInfo *VNI) { + return VNI->isPHIDef(); + })) + return false; + } + if (CmpLI) { + if (llvm::any_of(CmpLI->vnis(), + [&](const VNInfo *VNI) { + return VNI->isPHIDef(); + })) + return false; + } + // TODO: Guard against implicit def operands? LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t' << *Cmp << '\t' << *And); @@ -204,31 +229,36 @@ LLVM_DEBUG(dbgs() << "=>\n\t" << *Andn2 << '\n'); - SlotIndex CmpIdx = LIS->getInstructionIndex(*Cmp); - SlotIndex SelIdx = LIS->getInstructionIndex(*Sel); - - LiveInterval *CmpLI = - CmpReg.isVirtual() ? &LIS->getInterval(CmpReg) : nullptr; - LiveInterval *SelLI = - SelReg.isVirtual() ? &LIS->getInterval(SelReg) : nullptr; - // Update live intervals for CCReg before potentially removing CmpReg/SelReg, // and their associated liveness information. if (CCReg.isVirtual()) { - // Note: this ignores that SelLI might have multiple internal values - // or splits and simply extends the live range to cover all cases - // where the result of the v_cndmask_b32 was live (e.g. loops). - // This could yield worse register allocation in rare edge cases. - SlotIndex EndIdx = AndIdx.getRegSlot(); - if (SelLI && SelLI->endIndex() > EndIdx && SelLI->endIndex().isBlock()) - EndIdx = SelLI->endIndex(); + // Apply live ranges from SelLI to CCReg potentially matching splits + // and extending to loop boundaries. + + auto applyLiveRanges = [&](LiveRange &Dst, VNInfo *VNI) { + if (SelLI) { + // Copy live ranges from SelLI, adjusting start and end as required + for (auto I = SelLI->FindSegmentContaining(SelIdx.getRegSlot()); + I != SelLI->end(); ++I) { + SlotIndex Start = I->start < SelIdx.getRegSlot() ? + SelIdx.getRegSlot() : I->start; + SlotIndex End = I->end < AndIdx.getRegSlot() || I->end.isBlock() ? + I->end : AndIdx.getRegSlot(); + Dst.addSegment(LiveRange::Segment(Start, End, VNI)); + } + // If SelLI does not cover AndIdx (because Cmp killed Sel) then extend. + if (!SelLI->getSegmentContaining(AndIdx.getRegSlot())) + Dst.addSegment(LiveRange::Segment(CmpIdx.getRegSlot(), AndIdx.getRegSlot(), VNI)); + } else { + Dst.addSegment(LiveRange::Segment(SelIdx.getRegSlot(), + AndIdx.getRegSlot(), VNI)); + } + }; LiveInterval &CCLI = LIS->getInterval(CCReg); auto CCQ = CCLI.Query(SelIdx.getRegSlot()); - if (CCQ.valueIn()) { - CCLI.addSegment(LiveRange::Segment(SelIdx.getRegSlot(), - EndIdx, CCQ.valueIn())); - } + if (CCQ.valueIn()) + applyLiveRanges(CCLI, CCQ.valueIn()); if (CC->getSubReg()) { LaneBitmask Mask = TRI->getSubRegIndexLaneMask(CC->getSubReg()); @@ -237,10 +267,8 @@ Allocator, Mask, [=](LiveInterval::SubRange &SR) { auto CCQS = SR.Query(SelIdx.getRegSlot()); - if (CCQS.valueIn()) { - SR.addSegment(LiveRange::Segment( - SelIdx.getRegSlot(), EndIdx, CCQS.valueIn())); - } + if (CCQS.valueIn()) + applyLiveRanges(SR, CCQS.valueIn()); }, *LIS->getSlotIndexes(), *TRI); CCLI.removeEmptySubRanges();