Index: lib/CodeGen/RegisterCoalescer.cpp =================================================================== --- lib/CodeGen/RegisterCoalescer.cpp +++ lib/CodeGen/RegisterCoalescer.cpp @@ -2339,8 +2339,41 @@ continue; DEBUG(dbgs() << "\t\tconflict at " << PrintReg(Reg) << ':' << i << '@' << LR.getValNumInfo(i)->def << '\n'); - if (SubRangeJoin) - return false; + if (SubRangeJoin) { + // Unresolved subrange conflicts may remain here as a result of merging + // registers where subregister definitions of one register are + // overwritten by corresponding definitions of the other register. + // For example, in this case, whem coalescing vreg140 into vreg131: + // 704B %vreg140 = ... + // 720B %vreg131 = COPY %vreg140 + // 736B %vreg131:sub1 = COPY %vreg48 + // 768B %vreg131:sub2 = COPY %vreg23 + // 800B %vreg131:sub3 = COPY %vreg24 + // 832B %vreg131:sub4 = COPY %vreg25 + // 864B %vreg131:sub5 = COPY %vreg26 + // 896B %vreg131:sub6 = COPY %vreg40 + // 928B %vreg131:sub7 = COPY %vreg42 + // 960B %vreg131:sub8 = COPY %vreg44 + // 976B ... = use %vreg131 + // 1008B %vreg140:sub1 = COPY %vreg54 + // 1040B %vreg140:sub2 = COPY %vreg23 + // 1072B %vreg140:sub3 = COPY %vreg24 + // 1104B %vreg140:sub4 = COPY %vreg25 + // 1136B %vreg140:sub5 = COPY %vreg26 + // 1168B %vreg140:sub6 = COPY %vreg50 + // 1200B %vreg140:sub7 = COPY %vreg51 + // 1232B %vreg140:sub8 = COPY %vreg52 + // 1248B ... = use %vreg140 + // The conflict resolution for the main live ranges of both registers + // can determine that the coalescing is legal and may proceed, but the + // subregister ranges will still contain conflicts, since the subranges + // will overlap between the two virtual registers. Since repeating of + // the resolution code below for subregisters could only result in + // CR_Replace, and the legality has already been determined, assume + // the resolution to be CR_Replace without repeating the work. + V.Resolution = CR_Replace; + continue; + } ++NumLaneConflicts; assert(V.OtherVNI && "Inconsistent conflict resolution."); @@ -2700,6 +2733,7 @@ dbgs() << ": " << LRange << '\n'; }); LIS->extendToIndices(LRange, EndPoints); + ShrinkMask |= LaneMask; } void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, Index: test/CodeGen/AMDGPU/coalescer-subreg-join.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/coalescer-subreg-join.mir @@ -0,0 +1,75 @@ +# RUN: llc -march=amdgcn -run-pass simple-register-coalescing -o - %s | FileCheck %s +# Check that %11 and %20 have been coalesced. +# CHECK: IMAGE_SAMPLE_C_D_O_V1_V16 %[[REG:[0-9]+]] +# CHECK: IMAGE_SAMPLE_C_D_O_V1_V16 %[[REG]] + +--- +name: main +alignment: 0 +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_64 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } + - { id: 3, class: sreg_256 } + - { id: 4, class: sreg_128 } + - { id: 5, class: sreg_256 } + - { id: 6, class: sreg_128 } + - { id: 7, class: sreg_512 } + - { id: 9, class: vreg_512 } + - { id: 11, class: vreg_512 } + - { id: 18, class: vgpr_32 } + - { id: 20, class: vreg_512 } + - { id: 27, class: vgpr_32 } +liveins: + - { reg: '%sgpr2_sgpr3', virtual-reg: '%0' } + - { reg: '%vgpr2', virtual-reg: '%1' } + - { reg: '%vgpr3', virtual-reg: '%2' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0: + liveins: %sgpr2_sgpr3, %vgpr2, %vgpr3 + + %0 = COPY %sgpr2_sgpr3 + %1 = COPY %vgpr2 + %2 = COPY %vgpr3 + %3 = S_LOAD_DWORDX8_IMM %0, 0 + %4 = S_LOAD_DWORDX4_IMM %0, 12 + %5 = S_LOAD_DWORDX8_IMM %0, 16 + %6 = S_LOAD_DWORDX4_IMM %0, 28 + undef %7.sub0 = S_MOV_B32 212739 + %20 = COPY %7 + %11 = COPY %20 + %11.sub1 = COPY %1 + %11.sub2 = COPY %1 + %11.sub3 = COPY %1 + %11.sub4 = COPY %1 + %11.sub5 = COPY %1 + %11.sub6 = COPY %1 + %11.sub7 = COPY %1 + %11.sub8 = COPY %1 + dead %18 = IMAGE_SAMPLE_C_D_O_V1_V16 %11, %3, %4, 1, 0, 0, 0, 0, 0, 0, -1, implicit %exec + %20.sub1 = COPY %2 + %20.sub2 = COPY %2 + %20.sub3 = COPY %2 + %20.sub4 = COPY %2 + %20.sub5 = COPY %2 + %20.sub6 = COPY %2 + %20.sub7 = COPY %2 + %20.sub8 = COPY %2 + dead %27 = IMAGE_SAMPLE_C_D_O_V1_V16 %20, %5, %6, 1, 0, 0, 0, 0, 0, 0, -1, implicit %exec + +...