Index: lib/CodeGen/RegisterCoalescer.cpp =================================================================== --- lib/CodeGen/RegisterCoalescer.cpp +++ lib/CodeGen/RegisterCoalescer.cpp @@ -2085,9 +2085,24 @@ LaneBitmask computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const; /// Find the ultimate value that VNI was copied from. - std::pair followCopyChain(const VNInfo *VNI) const; - - bool valuesIdentical(VNInfo *Val0, VNInfo *Val1, const JoinVals &Other) const; + std::tuple + followCopyChain(const VNInfo *VNI, unsigned OtherReg) const; + + /// Determine whether Val0 for Reg and Val1 for Other.Reg are identical. + /// The first element of the returned pair is true if they are, false + /// otherwise. The second element is true if one value is defined + /// directly via the other, e.g.: + /// %reg0 = ... + /// %regx = COPY %reg0 ;; reg1 defined via reg0 + /// %reg1 = COPY %regx ;; val0 == val1 + /// -> { true, true } + /// vs + /// %reg0 = COPY %regy + /// %regx = COPY %regy + /// $reg1 = COPY %regx ;; val0 == val1 + /// -> { true, false } + std::pair valuesIdentical(VNInfo *Val0, VNInfo *Val1, + const JoinVals &Other) const; /// Analyze ValNo in this live range, and set all fields of Vals[ValNo]. /// Return a conflict resolution when possible, but leave the hard cases as @@ -2203,19 +2218,22 @@ return L; } -std::pair JoinVals::followCopyChain( - const VNInfo *VNI) const { +std::tuple JoinVals::followCopyChain( + const VNInfo *VNI, unsigned OtherReg) const { unsigned Reg = this->Reg; + bool UsedOtherReg = false; while (!VNI->isPHIDef()) { SlotIndex Def = VNI->def; MachineInstr *MI = Indexes->getInstructionFromIndex(Def); assert(MI && "No defining instruction"); if (!MI->isFullCopy()) - return std::make_pair(VNI, Reg); + return std::make_tuple(VNI, Reg, UsedOtherReg); unsigned SrcReg = MI->getOperand(1).getReg(); + if (SrcReg == OtherReg) + UsedOtherReg = true; if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) - return std::make_pair(VNI, Reg); + return std::make_tuple(VNI, Reg, UsedOtherReg); const LiveInterval &LI = LIS->getInterval(SrcReg); const VNInfo *ValueIn; @@ -2241,26 +2259,28 @@ VNI = ValueIn; Reg = SrcReg; } - return std::make_pair(VNI, Reg); + return std::make_tuple(VNI, Reg, UsedOtherReg); } -bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1, - const JoinVals &Other) const { +std::pair JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1, + const JoinVals &Other) const { const VNInfo *Orig0; unsigned Reg0; - std::tie(Orig0, Reg0) = followCopyChain(Value0); - if (Orig0 == Value1) - return true; + bool Other0; + std::tie(Orig0, Reg0, Other0) = followCopyChain(Value0, Other.Reg); + if (Orig0 == Value1 && Reg0 == Other.Reg) + return std::make_pair(true, Other0); const VNInfo *Orig1; unsigned Reg1; - std::tie(Orig1, Reg1) = Other.followCopyChain(Value1); + bool Other1; + std::tie(Orig1, Reg1, Other1) = Other.followCopyChain(Value1, Reg); // The values are equal if they are defined at the same place and use the // same register. Note that we cannot compare VNInfos directly as some of // them might be from a copy created in mergeSubRangeInto() while the other // is from the original LiveInterval. - return Orig0->def == Orig1->def && Reg0 == Reg1; + return std::make_pair(Orig0->def == Orig1->def && Reg0 == Reg1, Other1); } JoinVals::ConflictResolution @@ -2430,9 +2450,25 @@ // %other = COPY %ext // %this = COPY %ext <-- Erase this copy // - if (DefMI->isFullCopy() && !CP.isPartial() - && valuesIdentical(VNI, V.OtherVNI, Other)) - return CR_Erase; + // Avoid this case when there are subregs, as it can result in an incorrect + // subreg live range. + // + // Another case to be careful about is a case where the travesal of the + // COPY chain for one register encounters a use of the other register, + // e.g. + // 10B %other = COPY ... + // 20B %x = COPY %other + // 30B %this = COPY %x ;; assume that liveness of %this extends to the + // ;; end of the block (then back to the entry phi) + // Coalescing %this and %other can force a gap in the live range between + // 20r and 30r, and if the COPY at 30 is erased, its liveness will no longer + // extend to the end of the block (an IMPLICIT_DEF may incorrectly be added + // later on to create a definition that is live on exit). + if (DefMI->isFullCopy() && !CP.isPartial()) { + std::pair P = valuesIdentical(VNI, V.OtherVNI, Other); + if (P.first) + return P.second ? CR_Merge : CR_Erase; + } // If the lanes written by this instruction were all undef in OtherVNI, it is // still safe to join the live ranges. This can't be done with a simple value @@ -2957,7 +2993,8 @@ LRange.join(RRange, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo); - LLVM_DEBUG(dbgs() << "\t\tjoined lanes: " << LRange << "\n"); + LLVM_DEBUG(dbgs() << "\t\tjoined lanes: " << PrintLaneMask(LaneMask) << ' ' + << LRange << "\n"); if (EndPoints.empty()) return; Index: test/CodeGen/AMDGPU/coalescing-with-subregs-in-loop-bug.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/coalescing-with-subregs-in-loop-bug.mir @@ -0,0 +1,215 @@ +# RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx803 -run-pass=simple-register-coalescing,rename-independent-subregs %s -o - | FileCheck -check-prefix=GCN %s + +# This test is for a bug where the following happens: +# +# Inside the loop, %29.sub2 is used in a V_LSHLREV whose result is then used +# in an LDS read. %29 is a 128 bit value that is linked by copies to +# %45 (from phi elimination), %28 (the value in the loop pre-header), +# %31 (defined and subreg-modified in the loop, and used after the loop) +# and %30: +# +# %45:vreg_128 = COPY killed %28 +# bb.39: +# %29:vreg_128 = COPY killed %45 +# %39:vgpr_32 = V_LSHLREV_B32_e32 2, %29.sub2, implicit $exec +# %31:vreg_128 = COPY killed %29 +# %31.sub1:vreg_128 = COPY %34 +# %30:vreg_128 = COPY %31 +# %45:vreg_128 = COPY killed %30 +# S_CBRANCH_EXECNZ %bb.39, implicit $exec +# S_BRANCH %bb.40 +# bb.40: +# undef %8615.sub0:vreg_128 = COPY killed %31.sub0 +# +# So this coalesces together into a single 128 bit value whose sub1 is modified +# in the loop, but the sub2 used in the V_LSHLREV is not modified in the loop. +# +# The bug is that the coalesced value has a L00000004 subrange (for sub2) that +# says that it is not live up to the end of the loop block. The symptom is that +# Rename Independent Subregs separates sub2 into its own register, and it is +# not live round the loop, so that pass adds an IMPLICIT_DEF for it just before +# the loop backedge. + +# GCN: bb.1 (%ir-block.6): +# GCN: V_LSHLREV_B32_e32 2, [[val:%[0-9][0-9]*]].sub2 +# GCN-NOT: [[val]]:vreg_128 = IMPLICIT_DEF + +--- | + target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" + target triple = "amdgcn--amdpal" + + define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg, i32 inreg, i32 inreg, <3 x i32> inreg, i32 inreg, <3 x i32>) local_unnamed_addr #0 { + .entry: + br label %6 + + ;