Index: lib/CodeGen/RegisterCoalescer.cpp =================================================================== --- lib/CodeGen/RegisterCoalescer.cpp +++ lib/CodeGen/RegisterCoalescer.cpp @@ -1927,6 +1927,28 @@ // Update regalloc hint. TRI->updateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF); + if (!CP.isPhys()) { + // Fix up the case that some subreg was undefined at a use, and thus the + // use was not in the live range, but due to merging it has now become + // defined. + LiveInterval &LI = LIS->getInterval(CP.getDstReg()); + LLVM_DEBUG(dbgs() << "\tBefore extending subranges: " << LI << "\n"); + for (auto &S : LI.subranges()) { + SmallVector Uses; + for (auto &MO : MRI->reg_operands(CP.getDstReg())) { + if (!MO.isUse() || MO.isUndef() || MO.isDebug()) + continue; + auto OperandMask = TRI->getSubRegIndexLaneMask(MO.getSubReg()); + if ((OperandMask & S.LaneMask).none()) + continue; + Uses.push_back(LIS->getInstructionIndex(*MO.getParent()).getRegSlot()); + } + SmallVector Undefs; + LI.computeSubRangeUndefs(Undefs, S.LaneMask, *MRI, *LIS->getSlotIndexes()); + LIS->extendToIndices(S, Uses, Undefs); + } + } + LLVM_DEBUG({ dbgs() << "\tSuccess: " << printReg(CP.getSrcReg(), TRI, CP.getSrcIdx()) << " -> " << printReg(CP.getDstReg(), TRI, CP.getDstIdx()) << '\n'; Index: test/CodeGen/AMDGPU/coalescing-subreg-was-undef-but-became-def.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/coalescing-subreg-was-undef-but-became-def.mir @@ -0,0 +1,183 @@ +# RUN: llc -march=amdgcn -mcpu=gfx803 -run-pass simple-register-coalescing -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s +# +# This is another example of a test giving "Couldn't join subrange!" +# +# This shows a case where a whole reg use point of the whole register was +# undefined in one subreg, but after coalescing it became defined but the +# subrange was not updated to reflect that. +# +# GCN: {{^body}} + +--- +name: _amdgpu_cs_main +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +liveins: +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: +stack: +constants: +body: | + bb.0: + successors: %bb.21(0x40000000), %bb.1(0x40000000) + + S_CBRANCH_SCC1 %bb.21, implicit undef $scc + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2(0x40000000), %bb.3(0x40000000) + + S_CBRANCH_SCC1 %bb.3, implicit undef $scc + S_BRANCH %bb.2 + + bb.2: + successors: %bb.18(0x80000000) + + %36:vreg_128 = IMPLICIT_DEF + S_BRANCH %bb.18 + + bb.3: + successors: %bb.5(0x40000000), %bb.4(0x40000000) + + S_CBRANCH_SCC1 %bb.5, implicit undef $scc + S_BRANCH %bb.4 + + bb.4: + successors: %bb.5(0x80000000) + + + bb.5: + successors: %bb.7(0x40000000), %bb.6(0x40000000) + + S_CBRANCH_SCC1 %bb.7, implicit undef $scc + S_BRANCH %bb.6 + + bb.6: + successors: %bb.7(0x80000000) + + + bb.7: + successors: %bb.9(0x40000000), %bb.8(0x40000000) + + S_CBRANCH_SCC1 %bb.9, implicit undef $scc + S_BRANCH %bb.8 + + bb.8: + successors: %bb.9(0x80000000) + + + bb.9: + successors: %bb.10(0x40000000), %bb.14(0x40000000) + + S_CBRANCH_SCC1 %bb.14, implicit undef $scc + S_BRANCH %bb.10 + + bb.10: + successors: %bb.12(0x40000000), %bb.11(0x40000000) + + S_CBRANCH_SCC1 %bb.12, implicit undef $scc + S_BRANCH %bb.11 + + bb.11: + successors: %bb.12(0x80000000) + + + bb.12: + successors: %bb.14(0x40000000), %bb.13(0x40000000) + + S_CBRANCH_SCC1 %bb.14, implicit undef $scc + S_BRANCH %bb.13 + + bb.13: + successors: %bb.14(0x80000000) + + + bb.14: + successors: %bb.15(0x40000000), %bb.16(0x40000000) + + %6:sreg_32_xm0 = S_MOV_B32 0 + undef %7.sub2:sreg_128 = COPY %6 + S_CBRANCH_SCC0 %bb.16, implicit undef $scc + + bb.15: + successors: %bb.17(0x80000000) + + undef %8.sub0:sreg_128 = COPY killed %6 + %5:sreg_128 = COPY killed %7 + %34:sreg_128 = COPY killed %5 + %35:sreg_128 = COPY killed %8 + S_BRANCH %bb.17 + + bb.16: + successors: %bb.17(0x80000000) + + undef %18.sub0:sreg_128 = COPY %6 + %18.sub1:sreg_128 = COPY %6 + %18.sub2:sreg_128 = COPY killed %6 + %16:sreg_128 = COPY killed %18 + %15:sreg_128 = COPY killed %7 + %34:sreg_128 = COPY killed %15 + %35:sreg_128 = COPY killed %16 + + bb.17: + successors: %bb.18(0x40000000), %bb.22(0x40000000) + + %2:sreg_128 = COPY killed %35 + %1:sreg_128 = COPY killed %34 + %24:sreg_32_xm0 = S_ADD_I32 killed %1.sub2, target-flags(amdgpu-gotprel32-hi) 1, implicit-def dead $scc + S_CMP_LT_I32 killed %24, 8, implicit-def $scc + %30:vreg_128 = COPY %2 + %36:vreg_128 = COPY killed %30 + S_CBRANCH_SCC1 %bb.22, implicit killed $scc + S_BRANCH %bb.18 + + bb.18: + successors: %bb.19(0x30000000), %bb.20(0x50000000) + + %29:vreg_128 = COPY killed %36 + V_CMP_NE_U32_e32 0, killed %29.sub3, implicit-def $vcc, implicit $exec + $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc + S_CBRANCH_VCCNZ %bb.20, implicit killed $vcc + S_BRANCH %bb.19 + + bb.19: + successors: %bb.20(0x80000000) + + + bb.20: + successors: %bb.21(0x80000000) + + + bb.21: + S_ENDPGM + + bb.22: + successors: %bb.18(0x80000000) + + %31:vreg_128 = COPY killed %2 + %36:vreg_128 = COPY killed %31 + S_BRANCH %bb.18 + +...