Index: lib/CodeGen/LiveRangeCalc.cpp =================================================================== --- lib/CodeGen/LiveRangeCalc.cpp +++ lib/CodeGen/LiveRangeCalc.cpp @@ -391,7 +391,8 @@ if (TheVNI && TheVNI != VNI) UniqueVNI = false; TheVNI = VNI; - } + } else + FoundUndef = true; continue; } Index: test/CodeGen/AMDGPU/regcoalescer-assert-from-incorrect-subrange-extension.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/regcoalescer-assert-from-incorrect-subrange-extension.mir @@ -0,0 +1,161 @@ +# RUN: llc -march=amdgcn -run-pass simple-register-coalescing -o - %s | FileCheck --check-prefix=GCN %s +# REQUIRES: asserts +# +# With the fix in D49097 to extend subranges after a coalesce in +# RegisterCoalescing, but without the fix this test is provided with, a +# subrange extension goes wrong, provoking a "Couldn't join subrange!" in a +# later coalesce. +# +# What happens is that RegisterCoalescing calls extendToIndices on L00000001 of +# %89 [352r,672r:0)[672r,736B:1)[736B,816B:0)[816B,976r:2) 0@352r 1@672r 2@816B-phi +# L00000004 [352r,976r:0) 0@352r +# L00000001 [672r,688r:0) 0@672r +# with uses 688r 768r 976r and undef 352r (i.e. a point where the main register +# is defined, but this subrange is undef). +# The result it gets is +# L00000001 [672r,976r:0) 0@672r +# which is wrong, because that value is undefined in [736,816), and there +# should be a phi at 816: +# L00000001 [672r,736B:0)[816B,976r:1) 0@672r 1@816B-phi +# +# GCN: name: _amdgpu_vs_main + +--- +name: _amdgpu_vs_main +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +liveins: + - { reg: '$sgpr0', virtual-reg: '%12' } + - { reg: '$vgpr0', virtual-reg: '%13' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 4294967295 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: +stack: +constants: +body: | + bb.0: + successors: %bb.2(0x40000000), %bb.1(0x40000000) + liveins: $sgpr0, $vgpr0 + + %13:vgpr_32 = COPY killed $vgpr0 + %12:sgpr_32 = COPY killed $sgpr0 + %64:vgpr_32 = V_ADD_I32_e32 killed %12, killed %13, implicit-def dead $vcc, implicit $exec + %17:sreg_32_xm0 = S_MOV_B32 0 + %18:vgpr_32 = TBUFFER_LOAD_FORMAT_X_IDXEN killed %64, undef %20:sreg_128, 0, 28, 116, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4) + dead %65:vgpr_32 = V_BFE_U32 killed %18, 28, 1, implicit $exec + S_CBRANCH_SCC0 %bb.2, implicit undef $scc + + bb.1: + successors: %bb.3(0x80000000) + + %90:vreg_128 = IMPLICIT_DEF + S_BRANCH %bb.3 + + bb.2: + successors: %bb.3(0x80000000) + + undef %32.sub0:sreg_128 = COPY %17 + %32.sub1:sreg_128 = COPY %17 + %32.sub2:sreg_128 = COPY %17 + %32.sub3:sreg_128 = COPY killed %17 + %30:sreg_128 = COPY killed %32 + %71:vreg_128 = COPY killed %30 + %90:vreg_128 = COPY killed %71 + + bb.3: + successors: %bb.5(0x40000000), %bb.4(0x40000000) + + %70:vreg_128 = COPY killed %90 + %33:sreg_128 = S_BUFFER_LOAD_DWORDX4_IMM undef %34:sreg_128, 16, 0 :: (dereferenceable invariant load 16) + %2:vgpr_32 = V_ADD_F32_e64 0, target-flags(amdgpu-rel32-lo) 0, 0, killed %33.sub3, 0, 0, implicit $exec + S_CBRANCH_SCC1 %bb.5, implicit undef $scc + S_BRANCH %bb.4 + + bb.4: + successors: %bb.5(0x80000000) + + + bb.5: + successors: %bb.6(0x40000000), %bb.9(0x40000000) + + undef %77.sub2:vreg_128 = COPY killed %2 + %55:sreg_64 = V_CMP_NE_U32_e64 target-flags(amdgpu-gotprel) 0, killed %70.sub0, implicit $exec + %91:vreg_128 = IMPLICIT_DEF + %93:sreg_64 = COPY $exec, implicit-def $exec + %94:sreg_64 = S_AND_B64 %93, %55, implicit-def dead $scc + %5:sreg_64 = S_XOR_B64 %94, %93, implicit-def dead $scc + $exec = S_MOV_B64_term killed %94 + SI_MASK_BRANCH %bb.9, implicit $exec + S_BRANCH %bb.6 + + bb.6: + successors: %bb.8(0x40000000), %bb.7(0x40000000) + + S_CBRANCH_SCC1 %bb.8, implicit undef $scc + S_BRANCH %bb.7 + + bb.7: + successors: %bb.9(0x80000000) + + dead %56:sreg_32_xm0 = S_MOV_B32 0 + %89:vreg_128 = COPY %77 + %89.sub0:vreg_128 = COPY undef %56 + %88:vreg_128 = COPY killed %89 + %91:vreg_128 = COPY killed %88 + S_BRANCH %bb.9 + + bb.8: + successors: %bb.9(0x80000000) + + dead %57:sreg_32_xm0 = S_MOV_B32 0 + %85:vreg_128 = COPY %77 + %85.sub0:vreg_128 = COPY undef %57 + %91:vreg_128 = COPY killed %85 + + bb.9: + successors: %bb.10(0x40000000), %bb.11(0x40000000) + + %95:sreg_64 = COPY killed %5 + %9:sreg_64 = S_OR_SAVEEXEC_B64 %95, implicit-def $exec, implicit-def $scc, implicit $exec + %86:vreg_128 = COPY killed %91 + %92:vreg_128 = COPY killed %86 + $exec = S_XOR_B64_term $exec, %9, implicit-def $scc + SI_MASK_BRANCH %bb.11, implicit $exec + S_BRANCH %bb.10 + + bb.10: + successors: %bb.11(0x80000000) + + dead %58:sreg_32_xm0 = S_MOV_B32 0 + %81:vreg_128 = COPY killed %77 + %81.sub0:vreg_128 = COPY undef %58 + %92:vreg_128 = COPY killed %81 + + bb.11: + $exec = S_OR_B64 $exec, killed %9, implicit-def $scc + %82:vreg_128 = COPY killed %92 + EXP 39, undef %60:vgpr_32, killed %82.sub2, undef %62:vgpr_32, undef %63:vgpr_32, 0, 0, 15, implicit $exec + S_ENDPGM + +...