Index: llvm/lib/CodeGen/SplitKit.h =================================================================== --- llvm/lib/CodeGen/SplitKit.h +++ llvm/lib/CodeGen/SplitKit.h @@ -355,10 +355,10 @@ } /// Find a subrange corresponding to the exact lane mask @p LM in the live - /// interval @p LI. The interval @p LI is assumed to contain such a subrange. - /// This function is used to find corresponding subranges between the - /// original interval and the new intervals. - LiveInterval::SubRange &getSubRangeForMaskExact(LaneBitmask LM, + /// interval @p LI. If the interval @p LI does not contain contain such a + /// subrange, returns nullptr. This function is used to find corresponding + /// subranges between the original interval and the new intervals. + LiveInterval::SubRange *getSubRangeForMaskExact(LaneBitmask LM, LiveInterval &LI); /// Find a subrange corresponding to the lane mask @p LM, or a superset of it, Index: llvm/lib/CodeGen/SplitKit.cpp =================================================================== --- llvm/lib/CodeGen/SplitKit.cpp +++ llvm/lib/CodeGen/SplitKit.cpp @@ -406,12 +406,14 @@ } #endif -LiveInterval::SubRange &SplitEditor::getSubRangeForMaskExact(LaneBitmask LM, +LiveInterval::SubRange *SplitEditor::getSubRangeForMaskExact(LaneBitmask LM, LiveInterval &LI) { - for (LiveInterval::SubRange &S : LI.subranges()) + for (LiveInterval::SubRange &S : LI.subranges()) { if (S.LaneMask == LM) - return S; - llvm_unreachable("SubRange for this mask not found"); + return &S; + } + + return nullptr; } LiveInterval::SubRange &SplitEditor::getSubRangeForMask(LaneBitmask LM, @@ -1256,7 +1258,7 @@ LiveInterval &PLI = Edit->getParent(); // Need the cast because the inputs to ?: would otherwise be deemed // "incompatible": SubRange vs LiveInterval. - LiveRange &PSR = !LM.all() ? getSubRangeForMaskExact(LM, PLI) + LiveRange &PSR = !LM.all() ? *getSubRangeForMaskExact(LM, PLI) : static_cast(PLI); if (PSR.liveAt(LastUse)) LIC.extend(LR, End, /*PhysReg=*/0, Undefs); @@ -1292,8 +1294,9 @@ continue; unsigned RegIdx = RegAssign.lookup(V->def); LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx)); - LiveInterval::SubRange &S = getSubRangeForMaskExact(PS.LaneMask, LI); - if (removeDeadSegment(V->def, S)) + + LiveInterval::SubRange *S = getSubRangeForMaskExact(PS.LaneMask, LI); + if (!S || removeDeadSegment(V->def, *S)) continue; MachineBasicBlock &B = *LIS.getMBBFromIndex(V->def); @@ -1301,7 +1304,7 @@ &LIS.getVNInfoAllocator()); Undefs.clear(); LI.computeSubRangeUndefs(Undefs, PS.LaneMask, MRI, *LIS.getSlotIndexes()); - extendPHIRange(B, SubLIC, S, PS.LaneMask, Undefs); + extendPHIRange(B, SubLIC, *S, PS.LaneMask, Undefs); } } } Index: llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir @@ -0,0 +1,72 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-regalloc -run-pass=greedy -o - %s | FileCheck %s + +# Initially %2 starts out with 2 subranges (one for sub0, and one for +# the rest of the lanes). After %2 is split, after refineSubRanges the +# newly created register has a different set of lane masks since the +# copy bundle uses 2 different defs to cover the register. + +--- +name: subrange_for_this_mask_not_found +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + occupancy: 7 +body: | + ; CHECK-LABEL: name: subrange_for_this_mask_not_found + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:vreg_1024_align2 = IMPLICIT_DEF + ; CHECK: SI_SPILL_V1024_SAVE [[DEF1]], %stack.0, $sgpr32, 0, implicit $exec :: (store 128 into %stack.0, align 4, addrspace 5) + ; CHECK: bb.1: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: S_NOP 0, implicit [[DEF1]] + ; CHECK: S_NOP 0, implicit [[DEF1]] + ; CHECK: [[DEF2:%[0-9]+]]:vreg_1024_align2 = IMPLICIT_DEF + ; CHECK: S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: [[SI_SPILL_V1024_RESTORE:%[0-9]+]]:vreg_1024_align2 = SI_SPILL_V1024_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 128 from %stack.0, align 4, addrspace 5) + ; CHECK: undef %5.sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16:vreg_1024_align2 = COPY [[SI_SPILL_V1024_RESTORE]].sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16 { + ; CHECK: internal %5.sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31:vreg_1024_align2 = COPY [[SI_SPILL_V1024_RESTORE]].sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31 + ; CHECK: } + ; CHECK: %5.sub0:vreg_1024_align2 = IMPLICIT_DEF + ; CHECK: S_NOP 0, implicit %5.sub0 + ; CHECK: bb.3: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: S_NOP 0, implicit %5 + ; CHECK: bb.4: + ; CHECK: successors: %bb.3(0x40000000), %bb.5(0x40000000) + ; CHECK: [[DEF2:%[0-9]+]]:vreg_1024_align2 = IMPLICIT_DEF + ; CHECK: S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc + ; CHECK: bb.5: + ; CHECK: undef %3.sub0:vreg_1024_align2 = COPY [[DEF]] + ; CHECK: S_NOP 0, implicit %3 + bb.0: + %0:vgpr_32 = IMPLICIT_DEF + %1:vreg_1024_align2 = IMPLICIT_DEF + %2:vreg_1024_align2 = COPY %1 + + bb.1: + S_NOP 0, implicit %1 + S_NOP 0, implicit %1 + %1:vreg_1024_align2 = IMPLICIT_DEF + S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc + + bb.3: + %2.sub0:vreg_1024_align2 = IMPLICIT_DEF + S_NOP 0, implicit %2.sub0 + + bb.4: + S_NOP 0, implicit %2 + + bb.5: + %2:vreg_1024_align2 = IMPLICIT_DEF + S_CBRANCH_VCCNZ %bb.4, implicit undef $vcc + + bb.6: + undef %4.sub0:vreg_1024_align2 = COPY %0 + S_NOP 0, implicit %4 +...