Index: include/llvm/CodeGen/LiveInterval.h =================================================================== --- include/llvm/CodeGen/LiveInterval.h +++ include/llvm/CodeGen/LiveInterval.h @@ -790,8 +790,8 @@ /// L000F, refining for mask L0018. Will split the L00F0 lane into /// L00E0 and L0010 and the L000F lane into L0007 and L0008. The Mod /// function will be applied to the L0010 and L0008 subranges. - void refineSubRanges(BumpPtrAllocator &Allocator, LaneBitmask LaneMask, - std::function Mod); + bool refineSubRanges(BumpPtrAllocator &Allocator, LaneBitmask LaneMask, + std::function Mod); bool operator<(const LiveInterval& other) const { const SlotIndex &thisIndex = beginIndex(); Index: lib/CodeGen/LiveInterval.cpp =================================================================== --- lib/CodeGen/LiveInterval.cpp +++ lib/CodeGen/LiveInterval.cpp @@ -880,8 +880,8 @@ SubRanges = nullptr; } -void LiveInterval::refineSubRanges(BumpPtrAllocator &Allocator, - LaneBitmask LaneMask, std::function Apply) { +bool LiveInterval::refineSubRanges(BumpPtrAllocator &Allocator, + LaneBitmask LaneMask, std::function Apply) { LaneBitmask ToApply = LaneMask; for (SubRange &SR : subranges()) { LaneBitmask SRMask = SR.LaneMask; @@ -900,14 +900,16 @@ // Create a new subrange for the matching part MatchingRange = createSubRangeFrom(Allocator, Matching, SR); } - Apply(*MatchingRange); + if (!Apply(*MatchingRange)) + return false; ToApply &= ~Matching; } // Create a new subrange if there are uncovered bits left. if (ToApply.any()) { SubRange *NewRange = createSubRange(Allocator, ToApply); - Apply(*NewRange); + return Apply(*NewRange); } + return true; } unsigned LiveInterval::getSize() const { Index: lib/CodeGen/LiveRangeCalc.cpp =================================================================== --- lib/CodeGen/LiveRangeCalc.cpp +++ lib/CodeGen/LiveRangeCalc.cpp @@ -99,6 +99,7 @@ [&MO, this](LiveInterval::SubRange &SR) { if (MO.isDef()) createDeadDef(*Indexes, *Alloc, SR, MO); + return true; }); } Index: lib/CodeGen/RegisterCoalescer.cpp =================================================================== --- lib/CodeGen/RegisterCoalescer.cpp +++ lib/CodeGen/RegisterCoalescer.cpp @@ -180,12 +180,12 @@ /// LaneMask are split as necessary. @p LaneMask are the lanes that /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange /// lanemasks already adjusted to the coalesced register. - void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge, + bool mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge, LaneBitmask LaneMask, CoalescerPair &CP); /// Join the liveranges of two subregisters. Joins @p RRange into /// @p LRange, @p RRange may be invalid afterwards. - void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, + bool joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, LaneBitmask LaneMask, const CoalescerPair &CP); /// We found a non-trivially-coalescable copy. If the source value number is @@ -847,6 +847,7 @@ : SR.getVNInfoAt(CopyIdx); assert(BSubValNo != nullptr); addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo); + return true; }); } } @@ -2920,7 +2921,7 @@ } } -void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, +bool RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, LaneBitmask LaneMask, const CoalescerPair &CP) { SmallVector NewVNInfo; @@ -2937,12 +2938,14 @@ if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) { // We already determined that it is legal to merge the intervals, so this // should never fail. + return false; llvm_unreachable("*** Couldn't join subrange!\n"); } if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals)) { // We already determined that it is legal to merge the intervals, so this // should never fail. + return false; llvm_unreachable("*** Couldn't join subrange!\n"); } @@ -2966,7 +2969,7 @@ LLVM_DEBUG(dbgs() << "\t\tjoined lanes: " << LRange << "\n"); if (EndPoints.empty()) - return; + return true; // Recompute the parts of the live range we had to remove because of // CR_Replace conflicts. @@ -2980,22 +2983,24 @@ dbgs() << ": " << LRange << '\n'; }); LIS->extendToIndices(LRange, EndPoints); + return true; } -void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, +bool RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge, LaneBitmask LaneMask, CoalescerPair &CP) { BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); - LI.refineSubRanges(Allocator, LaneMask, + return LI.refineSubRanges(Allocator, LaneMask, [this,&Allocator,&ToMerge,&CP](LiveInterval::SubRange &SR) { if (SR.empty()) { SR.assign(ToMerge, Allocator); } else { // joinSubRegRange() destroys the merged range, so we need a copy. LiveRange RangeCopy(ToMerge, Allocator); - joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP); + return joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP); } + return true; }); } @@ -3045,25 +3050,36 @@ // Determine lanemasks of RHS in the coalesced register and merge subranges. unsigned SrcIdx = CP.getSrcIdx(); + bool Abort = false; if (!RHS.hasSubRanges()) { LaneBitmask Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask() : TRI->getSubRegIndexLaneMask(SrcIdx); - mergeSubRangeInto(LHS, RHS, Mask, CP); + if (!mergeSubRangeInto(LHS, RHS, Mask, CP)) + Abort = false; } else { // Pair up subranges and merge. for (LiveInterval::SubRange &R : RHS.subranges()) { LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask); - mergeSubRangeInto(LHS, R, Mask, CP); + if (!mergeSubRangeInto(LHS, R, Mask, CP)) { + Abort = false; + break; + } } } - LLVM_DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n"); + if (Abort) { + LLVM_DEBUG(dbgs() << "\tSubrange join aborted!\n"); + LHS.clearSubRanges(); + RHS.clearSubRanges(); + } else { + LLVM_DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n"); - // Pruning implicit defs from subranges may result in the main range - // having stale segments. - LHSVals.pruneMainSegments(LHS, ShrinkMainRange); + // Pruning implicit defs from subranges may result in the main range + // having stale segments. + LHSVals.pruneMainSegments(LHS, ShrinkMainRange); - LHSVals.pruneSubRegValues(LHS, ShrinkMask); - RHSVals.pruneSubRegValues(LHS, ShrinkMask); + LHSVals.pruneSubRegValues(LHS, ShrinkMask); + RHSVals.pruneSubRegValues(LHS, ShrinkMask); + } } // The merging algorithm in LiveInterval::join() can't handle conflicting Index: lib/CodeGen/SplitKit.cpp =================================================================== --- lib/CodeGen/SplitKit.cpp +++ lib/CodeGen/SplitKit.cpp @@ -531,6 +531,7 @@ DestLI.refineSubRanges(Allocator, LaneMask, [Def, &Allocator](LiveInterval::SubRange& SR) { SR.createDeadDef(Def, Allocator); + return true; }); return Def; } Index: test/CodeGen/AMDGPU/regcoal-subrange-join-crash.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/regcoal-subrange-join-crash.ll @@ -0,0 +1,216 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs -o - < %s + +; This test causes an unreachable in SubRange Join without the fix + +define void @foo(<3 x float> %arg0) { +entry: + %extractVec1 = shufflevector <3 x float> %arg0, <3 x float> undef, <4 x i32> + %arg0.addr.sroa.0.0 = select i1 undef, <4 x float> %extractVec1, <4 x float> %extractVec1 + %arg0.addr.sroa.0.0.vec.extract356 = shufflevector <4 x float> %arg0.addr.sroa.0.0, <4 x float> undef, <3 x i32> + br i1 undef, label %cleanup283, label %lor.lhs.false + +lor.lhs.false: ; preds = %entry + br i1 undef, label %if.end32, label %cleanup283 + +if.end32: ; preds = %lor.lhs.false + %sub = fsub nsz <3 x float> , %arg0.addr.sroa.0.0.vec.extract356 + %var4 = extractelement <3 x float> %sub, i64 2 + br i1 undef, label %mf_lambda.304.exit, label %if.else.i + +if.else.i: ; preds = %if.end32 + br i1 undef, label %mf_lambda.304.exit, label %if.end3.i + +if.end3.i: ; preds = %if.else.i + %call8.i = tail call nsz float @foo1(float undef) + %cmp9.i = fcmp nsz ugt float %var4, 0.000000e+00 + %sub.i = fsub nsz float -0.000000e+00, %call8.i + %v.0.i = select i1 %cmp9.i, float %call8.i, float %sub.i + %sub12.i = fadd nsz float %v.0.i, -1.000000e+00 + br label %mf_lambda.304.exit + +mf_lambda.304.exit: ; preds = %if.end32, %if.else.i, %if.end3.i + br i1 undef, label %mf_lambda.304.exit379, label %if.else.i364 + +if.else.i364: ; preds = %mf_lambda.304.exit + br i1 undef, label %mf_lambda.304.exit379, label %if.end3.i377 + +if.end3.i377: ; preds = %if.else.i364 + br label %mf_lambda.304.exit379 + +mf_lambda.304.exit379: ; preds = %mf_lambda.304.exit, %if.else.i364, %if.end3.i377 + br i1 undef, label %if.then68, label %if.else + +if.then68: ; preds = %mf_lambda.304.exit379 + br label %if.end79 + +if.else: ; preds = %mf_lambda.304.exit379 + br label %if.end79 + +if.end79: ; preds = %if.else, %if.then68 + br i1 undef, label %if.then.i, label %fresnel_dielectric_cos.251.exit + +if.then.i: ; preds = %if.end79 + br label %fresnel_dielectric_cos.251.exit + +fresnel_dielectric_cos.251.exit: ; preds = %if.end79, %if.then.i + br i1 undef, label %if.then93, label %fresnel_dielectric_cos.251.exit.if.end115_crit_edge + +fresnel_dielectric_cos.251.exit.if.end115_crit_edge: ; preds = %fresnel_dielectric_cos.251.exit + br label %if.end115 + +if.then93: ; preds = %fresnel_dielectric_cos.251.exit + br i1 undef, label %if.then.i.i, label %interpolate_fresnel_color.253.exit + +if.then.i.i: ; preds = %if.then93 + br label %interpolate_fresnel_color.253.exit + +interpolate_fresnel_color.253.exit: ; preds = %if.then93, %if.then.i.i + br label %if.end115 + +if.end115: ; preds = %fresnel_dielectric_cos.251.exit.if.end115_crit_edge, %interpolate_fresnel_color.253.exit + br i1 undef, label %cleanup252, label %if.end.i.lr.ph + +if.end.i.lr.ph: ; preds = %if.end115 + %extractVec120 = shufflevector <3 x float> %sub, <3 x float> undef, <4 x i32> + br label %if.end.i + +if.end.i: ; preds = %if.end.i.lr.ph, %for.inc + %extractVec126585 = phi <3 x float> [ %sub, %if.end.i.lr.ph ], [ %var67, %for.inc ] + %wr.sroa.0.0560577 = phi <4 x float> [ %extractVec120, %if.end.i.lr.ph ], [ %extractVec228, %for.inc ] + br i1 undef, label %if.end.i.i, label %if.else.i409 + +if.end.i.i: ; preds = %if.end.i + %mul.i407 = fmul nsz float undef, undef + br label %if.end129.sink.split + +if.else.i409: ; preds = %if.end.i + br i1 undef, label %if.end129, label %if.then7.i + +if.then7.i: ; preds = %if.else.i409 + br i1 undef, label %cleanup252, label %if.end10.i + +if.end10.i: ; preds = %if.then7.i + br i1 undef, label %if.else13.i, label %if.end129.sink.split + +if.else13.i: ; preds = %if.end10.i + br label %if.end129.sink.split + +if.end129.sink.split: ; preds = %if.end10.i, %if.else13.i, %if.end.i.i + br label %if.end129 + +if.end129: ; preds = %if.end129.sink.split, %if.else.i409 + br i1 undef, label %if.then.i.i433, label %lor.lhs.false.i.i + +lor.lhs.false.i.i: ; preds = %if.end129 + br i1 undef, label %if.then.i.i433, label %if.end.i.i436 + +if.then.i.i433: ; preds = %lor.lhs.false.i.i, %if.end129 + br label %mf_sampleP22_11.298.exit.i + +if.end.i.i436: ; preds = %lor.lhs.false.i.i + br i1 undef, label %mf_sampleP22_11.298.exit.i, label %if.end16.i.i + +if.end16.i.i: ; preds = %if.end.i.i436 + br i1 undef, label %mf_sampleP22_11.298.exit.i, label %if.end26.i.i + +if.end26.i.i: ; preds = %if.end16.i.i + br label %mf_sampleP22_11.298.exit.i + +mf_sampleP22_11.298.exit.i: ; preds = %if.end26.i.i, %if.end16.i.i, %if.end.i.i436, %if.then.i.i433 + br i1 undef, label %cond.true.i.i, label %mf_sample_vndf.299.exit + +cond.true.i.i: ; preds = %mf_sampleP22_11.298.exit.i + br label %mf_sample_vndf.299.exit + +mf_sample_vndf.299.exit: ; preds = %mf_sampleP22_11.298.exit.i, %cond.true.i.i + br i1 undef, label %if.end219, label %if.end.i443 + +if.end.i443: ; preds = %mf_sample_vndf.299.exit + %var53 = fsub nsz <4 x float> %arg0.addr.sroa.0.0, %wr.sroa.0.0560577 + %var54 = shufflevector <4 x float> %var53, <4 x float> undef, <3 x i32> + call nsz <3 x float> @foo13(<3 x float> %var54) + br i1 undef, label %mf_eval_phase_glossy.301.exit, label %if.end20.i + +if.end20.i: ; preds = %if.end.i443 + br i1 undef, label %mf_eval_phase_glossy.301.exit, label %if.end37.i + +if.end37.i: ; preds = %if.end20.i + br i1 undef, label %if.then43.i, label %if.else.i453 + +if.then43.i: ; preds = %if.end37.i + br label %if.end54.i + +if.else.i453: ; preds = %if.end37.i + br label %if.end54.i + +if.end54.i: ; preds = %if.else.i453, %if.then43.i + br label %mf_eval_phase_glossy.301.exit + +mf_eval_phase_glossy.301.exit: ; preds = %if.end.i443, %if.end20.i, %if.end54.i + br i1 undef, label %if.end191, label %if.end3.i462 + +if.end3.i462: ; preds = %mf_eval_phase_glossy.301.exit + br label %if.end191 + +if.end191: ; preds = %mf_eval_phase_glossy.301.exit, %if.end3.i462 + br i1 undef, label %if.then195, label %cleanup252 + +if.then195: ; preds = %if.end191 + br i1 undef, label %if.then201, label %if.end219 + +if.then201: ; preds = %if.then195 + br i1 undef, label %if.then.i.i478, label %interpolate_fresnel_color.253.exit486 + +if.then.i.i478: ; preds = %if.then201 + br label %interpolate_fresnel_color.253.exit486 + +interpolate_fresnel_color.253.exit486: ; preds = %if.then201, %if.then.i.i478 + br label %if.end219 + +if.end219: ; preds = %mf_sample_vndf.299.exit, %interpolate_fresnel_color.253.exit486, %if.then195 + %var67 = tail call nsz <3 x float> @foo13(<3 x float> %extractVec126585) + %extractVec228 = shufflevector <3 x float> %var67, <3 x float> undef, <4 x i32> + br i1 undef, label %mf_lambda.304.exit508, label %if.else.i493 + +if.else.i493: ; preds = %if.end219 + br i1 undef, label %mf_lambda.304.exit508, label %if.end3.i506 + +if.end3.i506: ; preds = %if.else.i493 + br label %mf_lambda.304.exit508 + +mf_lambda.304.exit508: ; preds = %if.end219, %if.else.i493, %if.end3.i506 + br i1 undef, label %if.end245, label %if.then235 + +if.then235: ; preds = %mf_lambda.304.exit508 + br label %if.end245 + +if.end245: ; preds = %if.then235, %mf_lambda.304.exit508 + br i1 undef, label %for.inc, label %if.end.i514 + +if.end.i514: ; preds = %if.end245 + br i1 undef, label %for.inc, label %if.end3.i516 + +if.end3.i516: ; preds = %if.end.i514 + br label %for.inc + +for.inc: ; preds = %if.end245, %if.end.i514, %if.end3.i516 + %cmp.i405 = fcmp nsz ogt float undef, undef + br i1 %cmp.i405, label %cleanup252, label %if.end.i + +cleanup252: ; preds = %if.then7.i, %for.inc, %if.end191, %if.end115 + br i1 undef, label %if.then254, label %if.end265 + +if.then254: ; preds = %cleanup252 + br label %if.end265 + +if.end265: ; preds = %if.then254, %cleanup252 + %extractVec268 = shufflevector <4 x float> undef, <4 x float> undef, <3 x i32> + br label %cleanup283 + +cleanup283: ; preds = %entry, %lor.lhs.false, %if.end265 + ret void +} + +declare float @foo1(float) +declare float @foo12(<2 x float>) +declare <3 x float> @foo13(<3 x float>)