Index: llvm/trunk/include/llvm/CodeGen/LiveInterval.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/LiveInterval.h +++ llvm/trunk/include/llvm/CodeGen/LiveInterval.h @@ -789,8 +789,15 @@ /// L000F, refining for mask L0018. Will split the L00F0 lane into /// L00E0 and L0010 and the L000F lane into L0007 and L0008. The Mod /// function will be applied to the L0010 and L0008 subranges. + /// + /// \p Indexes and \p TRI are required to clean up the VNIs that + /// don't defne the related lane masks after they get shrunk. E.g., + /// when L000F gets split into L0007 and L0008 maybe only a subset + /// of the VNIs that defined L000F defines L0007. void refineSubRanges(BumpPtrAllocator &Allocator, LaneBitmask LaneMask, - std::function Apply); + std::function Apply, + const SlotIndexes &Indexes, + const TargetRegisterInfo &TRI); bool operator<(const LiveInterval& other) const { const SlotIndex &thisIndex = beginIndex(); Index: llvm/trunk/lib/CodeGen/LiveInterval.cpp =================================================================== --- llvm/trunk/lib/CodeGen/LiveInterval.cpp +++ llvm/trunk/lib/CodeGen/LiveInterval.cpp @@ -879,8 +879,53 @@ SubRanges = nullptr; } -void LiveInterval::refineSubRanges(BumpPtrAllocator &Allocator, - LaneBitmask LaneMask, std::function Apply) { +/// For each VNI in \p SR, check whether or not that value defines part +/// of the mask describe by \p LaneMask and if not, remove that value +/// from \p SR. +static void stripValuesNotDefiningMask(unsigned Reg, LiveInterval::SubRange &SR, + LaneBitmask LaneMask, + const SlotIndexes &Indexes, + const TargetRegisterInfo &TRI) { + // Phys reg should not be tracked at subreg level. + // Same for noreg (Reg == 0). + if (!TargetRegisterInfo::isVirtualRegister(Reg) || !Reg) + return; + // Remove the values that don't define those lanes. + SmallVector ToBeRemoved; + for (VNInfo *VNI : SR.valnos) { + if (VNI->isUnused()) + continue; + // PHI definitions don't have MI attached, so there is nothing + // we can use to strip the VNI. + if (VNI->isPHIDef()) + continue; + const MachineInstr *MI = Indexes.getInstructionFromIndex(VNI->def); + assert(MI && "Cannot find the definition of a value"); + bool hasDef = false; + for (ConstMIBundleOperands MOI(*MI); MOI.isValid(); ++MOI) { + if (!MOI->isReg() || !MOI->isDef()) + continue; + if (MOI->getReg() != Reg) + continue; + if ((TRI.getSubRegIndexLaneMask(MOI->getSubReg()) & LaneMask).none()) + continue; + hasDef = true; + break; + } + + if (!hasDef) + ToBeRemoved.push_back(VNI); + } + for (VNInfo *VNI : ToBeRemoved) + SR.removeValNo(VNI); + + assert(!SR.empty() && "At least one value should be defined by this mask"); +} + +void LiveInterval::refineSubRanges( + BumpPtrAllocator &Allocator, LaneBitmask LaneMask, + std::function Apply, + const SlotIndexes &Indexes, const TargetRegisterInfo &TRI) { LaneBitmask ToApply = LaneMask; for (SubRange &SR : subranges()) { LaneBitmask SRMask = SR.LaneMask; @@ -898,6 +943,10 @@ SR.LaneMask = SRMask & ~Matching; // Create a new subrange for the matching part MatchingRange = createSubRangeFrom(Allocator, Matching, SR); + // Now that the subrange is split in half, make sure we + // only keep in the subranges the VNIs that touch the related half. + stripValuesNotDefiningMask(reg, *MatchingRange, Matching, Indexes, TRI); + stripValuesNotDefiningMask(reg, SR, SR.LaneMask, Indexes, TRI); } Apply(*MatchingRange); ToApply &= ~Matching; Index: llvm/trunk/lib/CodeGen/LiveRangeCalc.cpp =================================================================== --- llvm/trunk/lib/CodeGen/LiveRangeCalc.cpp +++ llvm/trunk/lib/CodeGen/LiveRangeCalc.cpp @@ -95,10 +95,11 @@ } LI.refineSubRanges(*Alloc, SubMask, - [&MO, this](LiveInterval::SubRange &SR) { - if (MO.isDef()) - createDeadDef(*Indexes, *Alloc, SR, MO); - }); + [&MO, this](LiveInterval::SubRange &SR) { + if (MO.isDef()) + createDeadDef(*Indexes, *Alloc, SR, MO); + }, + *Indexes, TRI); } // Create the def in the main liverange. We do not have to do this if Index: llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp =================================================================== --- llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp +++ llvm/trunk/lib/CodeGen/RegisterCoalescer.cpp @@ -924,23 +924,25 @@ } SlotIndex AIdx = CopyIdx.getRegSlot(true); LaneBitmask MaskA; + const SlotIndexes &Indexes = *LIS->getSlotIndexes(); for (LiveInterval::SubRange &SA : IntA.subranges()) { VNInfo *ASubValNo = SA.getVNInfoAt(AIdx); assert(ASubValNo != nullptr); MaskA |= SA.LaneMask; - IntB.refineSubRanges(Allocator, SA.LaneMask, - [&Allocator,&SA,CopyIdx,ASubValNo,&ShrinkB] - (LiveInterval::SubRange &SR) { - VNInfo *BSubValNo = SR.empty() - ? SR.getNextValue(CopyIdx, Allocator) - : SR.getVNInfoAt(CopyIdx); - assert(BSubValNo != nullptr); - auto P = addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo); - ShrinkB |= P.second; - if (P.first) - BSubValNo->def = ASubValNo->def; - }); + IntB.refineSubRanges( + Allocator, SA.LaneMask, + [&Allocator, &SA, CopyIdx, ASubValNo, + &ShrinkB](LiveInterval::SubRange &SR) { + VNInfo *BSubValNo = SR.empty() ? SR.getNextValue(CopyIdx, Allocator) + : SR.getVNInfoAt(CopyIdx); + assert(BSubValNo != nullptr); + auto P = addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo); + ShrinkB |= P.second; + if (P.first) + BSubValNo->def = ASubValNo->def; + }, + Indexes, *TRI); } // Go over all subranges of IntB that have not been covered by IntA, // and delete the segments starting at CopyIdx. This can happen if @@ -3262,16 +3264,18 @@ LaneBitmask LaneMask, CoalescerPair &CP) { BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); - LI.refineSubRanges(Allocator, LaneMask, - [this,&Allocator,&ToMerge,&CP](LiveInterval::SubRange &SR) { - if (SR.empty()) { - SR.assign(ToMerge, Allocator); - } else { - // joinSubRegRange() destroys the merged range, so we need a copy. - LiveRange RangeCopy(ToMerge, Allocator); - joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP); - } - }); + LI.refineSubRanges( + Allocator, LaneMask, + [this, &Allocator, &ToMerge, &CP](LiveInterval::SubRange &SR) { + if (SR.empty()) { + SR.assign(ToMerge, Allocator); + } else { + // joinSubRegRange() destroys the merged range, so we need a copy. + LiveRange RangeCopy(ToMerge, Allocator); + joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP); + } + }, + *LIS->getSlotIndexes(), *TRI); } bool RegisterCoalescer::isHighCostLiveInterval(LiveInterval &LI) { Index: llvm/trunk/lib/CodeGen/SplitKit.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SplitKit.cpp +++ llvm/trunk/lib/CodeGen/SplitKit.cpp @@ -520,17 +520,18 @@ .addReg(FromReg, 0, SubIdx); BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator(); + SlotIndexes &Indexes = *LIS.getSlotIndexes(); if (FirstCopy) { - SlotIndexes &Indexes = *LIS.getSlotIndexes(); Def = Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot(); } else { CopyMI->bundleWithPred(); } LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubIdx); DestLI.refineSubRanges(Allocator, LaneMask, - [Def, &Allocator](LiveInterval::SubRange& SR) { - SR.createDeadDef(Def, Allocator); - }); + [Def, &Allocator](LiveInterval::SubRange &SR) { + SR.createDeadDef(Def, Allocator); + }, + Indexes, TRI); return Def; } Index: llvm/trunk/test/CodeGen/SystemZ/regcoal-subranges-update.mir =================================================================== --- llvm/trunk/test/CodeGen/SystemZ/regcoal-subranges-update.mir +++ llvm/trunk/test/CodeGen/SystemZ/regcoal-subranges-update.mir @@ -0,0 +1,94 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple s390x-ibm-linux -mcpu=z13 -systemz-subreg-liveness -verify-machineinstrs -start-before simple-register-coalescing -stop-after greedy -o - %s | FileCheck %s + +# Check that when we split the live-range with several active lanes +# as part of the live-range update, we correctly eliminate the VNI from +# the relevant part. +# +# In this specific test, the register coalescer will: +# 1. Merge %0 with %1, creating a live-range for the full value subreg_l32 + subreg_h32 +# (actually %0 gets merge with %1 via rematerialization, and technically %0 and %1 +# remain two different live-ranges.) +# 2. Merge %2 with %1 triggering a split into the subreg_l32 + subreg_h32 ranges, since +# %2 only touches subreg_l32. As part of the split the subrange covering subreg_h32 +# must contain only the VNI for the high part (i.e., the one tied with the remaaat of %0). +# This used to be broken and trigger a machine verifier error, because we were not +# clearing the dead value w.r.t. lanes when doing the splitting. I.e., we were ending +# with a subrange referring a value that did not define that lane. +# +# PR40835 +--- +name: main +tracksRegLiveness: true +body: | + bb.0: + + ; CHECK-LABEL: name: main + ; CHECK: [[LGHI:%[0-9]+]]:gr64bit = LGHI 43 + ; CHECK: [[LGHI1:%[0-9]+]]:gr64bit = LGHI 43 + ; CHECK: [[LGHI1]].subreg_l32:gr64bit = MSR [[LGHI1]].subreg_l32, [[LGHI1]].subreg_l32 + ; CHECK: [[LGHI1]].subreg_l32:gr64bit = AHIMux [[LGHI1]].subreg_l32, 9, implicit-def dead $cc + ; CHECK: undef %3.subreg_l64:gr128bit = LGFI -245143785, implicit [[LGHI1]].subreg_l32 + ; CHECK: [[DLGR:%[0-9]+]]:gr128bit = DLGR [[DLGR]], [[LGHI]] + ; CHECK: Return implicit [[DLGR]] + %0:gr64bit = LGHI 43 + %1:gr32bit = COPY %0.subreg_l32 + %1:gr32bit = MSR %1, %1 + %2:gr32bit = COPY killed %1 + %2:gr32bit = AHIMux killed %2, 9, implicit-def dead $cc + undef %3.subreg_l64:gr128bit = LGFI -245143785, implicit killed %2 + %3:gr128bit = DLGR %3:gr128bit, killed %0 + Return implicit killed %3 + +... + +# Make sure the compiler does not choke on VNIs that don't +# an explicit MI as definition. +# In that specific example, this is the PHI not explicitly +# represented for the value carried by %7. +--- +name: segfault +tracksRegLiveness: true +liveins: [] +body: | + ; CHECK-LABEL: name: segfault + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[LGHI:%[0-9]+]]:addr64bit = LGHI 0 + ; CHECK: bb.1: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: ADJCALLSTACKDOWN 0, 0 + ; CHECK: [[LGFR:%[0-9]+]]:gr64bit = LGFR [[LGHI]].subreg_l32 + ; CHECK: $r2d = LGHI 123 + ; CHECK: $r3d = LGHI 0 + ; CHECK: $r4d = LGHI 0 + ; CHECK: $r5d = COPY [[LGFR]] + ; CHECK: KILL killed $r2d, killed $r3d, killed $r4d, $r5d, csr_systemz, implicit-def dead $r14d, implicit-def dead $cc + ; CHECK: ADJCALLSTACKUP 0, 0 + ; CHECK: [[LGHI]]:addr64bit = nuw nsw LA [[LGHI]], 1, $noreg + ; CHECK: J %bb.1 + bb.0: + successors: %bb.1(0x80000000) + + %2:gr64bit = LGHI 0 + %5:gr64bit = LGHI 123 + %7:addr64bit = COPY %2 + + bb.1: + successors: %bb.1(0x80000000) + + %0:addr64bit = COPY killed %7 + ADJCALLSTACKDOWN 0, 0 + %3:gr32bit = COPY %0.subreg_l32 + %4:gr64bit = LGFR killed %3 + $r2d = COPY %5 + $r3d = COPY %2 + $r4d = COPY %2 + $r5d = COPY killed %4 + KILL killed $r2d, killed $r3d, killed $r4d, killed $r5d, csr_systemz, implicit-def dead $r14d, implicit-def dead $cc + ADJCALLSTACKUP 0, 0 + %1:gr64bit = nuw nsw LA killed %0, 1, $noreg + %7:addr64bit = COPY killed %1 + J %bb.1 + +...