Index: lib/CodeGen/RegisterCoalescer.cpp =================================================================== --- lib/CodeGen/RegisterCoalescer.cpp +++ lib/CodeGen/RegisterCoalescer.cpp @@ -3032,7 +3032,9 @@ // If a subrange starts at the copy then an undefined value has been // copied and we must remove that subrange value as well. VNInfo *ValueOut = Q.valueOutOrDead(); - if (ValueOut != nullptr && Q.valueIn() == nullptr) { + if (ValueOut != nullptr && (Q.valueIn() == nullptr || + (V.Identical && V.Resolution == CR_Erase && + ValueOut->def == Def))) { LLVM_DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask) << " at " << Def << "\n"); SmallVector EndPoints; @@ -3041,7 +3043,7 @@ // Mark value number as unused. ValueOut->markUnused(); - if (V.Identical && S.Query(OtherDef).valueOut()) { + if (V.Identical && S.Query(OtherDef).valueOutOrDead()) { // If V is identical to V.OtherVNI (and S was live at OtherDef), // then we can't simply prune V from S. V needs to be replaced // with V.OtherVNI. Index: test/CodeGen/AMDGPU/coalescer-subranges-prune-kill-copy.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/coalescer-subranges-prune-kill-copy.mir @@ -0,0 +1,286 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=simple-register-coalescing -o - %s | FileCheck -check-prefix=GCN %s + +# Test used to crash with message: +# JoinVals::ConflictResolution (anonymous namespace)::JoinVals::analyzeValue(unsigned int, (anonymous namespace)::JoinVals &): Assertion `DefMI != nullptr' failed + +# GCN: bb.16: +# GCN: S_CMP_LT_I32 %1.sub0, +# GCN-NEXT: vreg_256 = COPY %35 +# GCN-NEXT: S_CBRANCH_SCC1 %bb.23 + +--- +name: test +tracksRegLiveness: true +liveins: + - { reg: '$sgpr0_sgpr1', virtual-reg: '%16' } +body: | + bb.0: + successors: %bb.1(0x40000000), %bb.28(0x40000000) + liveins: $sgpr0_sgpr1 + + %16:sgpr_64 = COPY killed $sgpr0_sgpr1 + S_CBRANCH_SCC1 %bb.28, implicit undef $scc + S_BRANCH %bb.1 + + bb.1: + successors: %bb.28(0x40000000), %bb.2(0x40000000) + + S_CBRANCH_SCC1 %bb.28, implicit undef $scc + S_BRANCH %bb.2 + + bb.2: + successors: %bb.3(0x80000000) + + %19:sreg_128 = S_LOAD_DWORDX4_IMM killed %16, 36, 0, 0 + %22:vgpr_32 = V_CVT_F32_U32_e32 %19.sub2, implicit $exec + %23:vgpr_32 = V_RCP_IFLAG_F32_e32 killed %22, implicit $exec + %25:vgpr_32 = V_MUL_F32_e32 1333788672, killed %23, implicit $exec + %26:vgpr_32 = V_CVT_U32_F32_e32 killed %25, implicit $exec + %133:vgpr_32 = V_MUL_HI_U32 %26, %19.sub2, implicit $exec + %134:vgpr_32 = V_MUL_LO_U32 %26, %19.sub2, implicit $exec + %135:vgpr_32 = V_SUB_U32_e32 0, %134, implicit $exec + %33:sreg_64_xexec = V_CMP_EQ_U32_e64 0, killed %133, implicit $exec + %35:vgpr_32 = V_CNDMASK_B32_e64 0, killed %134, 0, killed %135, %33, implicit $exec + %136:vgpr_32 = V_MUL_HI_U32 killed %35, %26, implicit $exec + %138:vgpr_32 = V_ADD_U32_e32 %26, %136, implicit $exec + %137:vgpr_32 = V_SUB_U32_e32 killed %26, killed %136, implicit $exec + %45:vgpr_32 = V_CNDMASK_B32_e64 0, killed %137, 0, killed %138, killed %33, implicit $exec + %139:vgpr_32 = V_MUL_HI_U32 killed %45, %19.sub1, implicit $exec + %142:vgpr_32 = V_MUL_LO_U32 %139, %19.sub2, implicit $exec + %143:vgpr_32 = V_SUB_U32_e32 %19.sub1, %142, implicit $exec + %52:sreg_64 = V_CMP_LE_U32_e64 %19.sub2, killed %143, implicit $exec + %54:sreg_64_xexec = V_CMP_GE_U32_e64 %19.sub1, killed %142, implicit $exec + %56:sreg_64_xexec = S_AND_B64 killed %52, %54, implicit-def dead $scc + %141:vgpr_32 = V_ADD_U32_e32 1, %139, implicit $exec + %140:vgpr_32 = V_ADD_U32_e32 -1, %139, implicit $exec + %61:vgpr_32 = V_CNDMASK_B32_e64 0, killed %139, 0, killed %141, killed %56, implicit $exec + %64:vgpr_32 = V_CNDMASK_B32_e64 0, killed %140, 0, killed %61, killed %54, implicit $exec + %145:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + undef %144.sub0:vreg_64 = COPY killed %64 + %144.sub1:vreg_64 = COPY killed %145 + + bb.3: + successors: %bb.5(0x40000000), %bb.4(0x40000000) + + S_CMP_LT_I32 %19.sub0, target-flags(amdgpu-gotprel) 8, implicit-def $scc + S_CBRANCH_SCC1 %bb.5, implicit killed $scc + S_BRANCH %bb.4 + + bb.4: + successors: %bb.28(0x40000000), %bb.6(0x40000000) + + S_CMP_EQ_U32 %19.sub0, target-flags(amdgpu-gotprel) 8, implicit-def $scc + S_CBRANCH_SCC1 %bb.28, implicit killed $scc + S_BRANCH %bb.6 + + bb.5: + successors: %bb.28(0x40000000), %bb.6(0x40000000) + + S_CMP_LG_U32 %19.sub0, target-flags(amdgpu-gotprel) 7, implicit-def $scc + S_CBRANCH_SCC1 %bb.6, implicit killed $scc + S_BRANCH %bb.28 + + bb.6: + successors: %bb.7(0x80000000) + + + bb.7: + successors: %bb.8(0x80000000) + + %72:sreg_64 = V_CMP_NE_U64_e64 0, killed %144, implicit $exec + %109:sreg_32 = S_MOV_B32 0 + %115:sreg_64 = S_AND_B64 $exec, killed %72, implicit-def dead $scc + %170:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + + bb.8: + successors: %bb.9(0x80000000) + + + bb.9: + successors: %bb.15(0x40000000), %bb.10(0x40000000) + + S_CMP_LT_I32 %19.sub0, target-flags(amdgpu-gotprel) 4, implicit-def $scc + S_CBRANCH_SCC1 %bb.15, implicit killed $scc + S_BRANCH %bb.10 + + bb.10: + successors: %bb.11(0x40000000), %bb.12(0x40000000) + + S_CMP_LT_I32 %19.sub0, target-flags(amdgpu-gotprel) 5, implicit-def $scc + S_CBRANCH_SCC0 %bb.12, implicit killed $scc + + bb.11: + successors: %bb.21(0x80000000) + + undef %75.sub0:vreg_256 = COPY %170 + %75.sub1:vreg_256 = COPY %170 + %75.sub3:vreg_256 = COPY %170 + %75.sub4:vreg_256 = COPY %170 + %75.sub5:vreg_256 = COPY %170 + %75.sub6:vreg_256 = COPY %170 + %75.sub7:vreg_256 = COPY %170 + %204:vreg_256 = COPY killed %75 + S_BRANCH %bb.21 + + bb.12: + successors: %bb.13(0x40000000), %bb.14(0x40000000) + + S_CMP_LT_I32 %19.sub0, target-flags(amdgpu-gotprel) 6, implicit-def $scc + S_CBRANCH_SCC0 %bb.14, implicit killed $scc + + bb.13: + successors: %bb.20(0x80000000) + + undef %79.sub0:vreg_256 = COPY %170 + %79.sub1:vreg_256 = COPY %170 + %79.sub3:vreg_256 = COPY %170 + %79.sub4:vreg_256 = COPY %170 + %79.sub5:vreg_256 = COPY %170 + %79.sub6:vreg_256 = COPY %170 + %79.sub7:vreg_256 = COPY %170 + %203:vreg_256 = COPY killed %79 + S_BRANCH %bb.20 + + bb.14: + successors: %bb.19(0x40000000), %bb.25(0x40000000) + + S_CMP_EQ_U32 %19.sub0, target-flags(amdgpu-gotprel) 6, implicit-def $scc + S_CBRANCH_SCC1 %bb.19, implicit killed $scc + S_BRANCH %bb.25 + + bb.15: + successors: %bb.17(0x40000000), %bb.16(0x40000000) + + S_CMP_LT_I32 %19.sub0, target-flags(amdgpu-gotprel) 2, implicit-def $scc + S_CBRANCH_SCC1 %bb.17, implicit killed $scc + S_BRANCH %bb.16 + + bb.16: + successors: %bb.23(0x40000000), %bb.22(0x40000000) + + undef %103.sub0:vreg_256 = COPY %170 + %103.sub1:vreg_256 = COPY %170 + %103.sub2:vreg_256 = COPY %170 + %103.sub3:vreg_256 = COPY %170 + %103.sub4:vreg_256 = COPY %170 + %103.sub5:vreg_256 = COPY %170 + %103.sub6:vreg_256 = COPY %170 + %103.sub7:vreg_256 = COPY %170 + S_CMP_LT_I32 %19.sub0, target-flags(amdgpu-gotprel) 3, implicit-def $scc + %205:vreg_256 = COPY %103 + %206:vreg_256 = COPY killed %103 + S_CBRANCH_SCC1 %bb.23, implicit killed $scc + S_BRANCH %bb.22 + + bb.17: + successors: %bb.18(0x40000000), %bb.25(0x40000000) + + S_CMP_EQ_U32 %19.sub0, target-flags(amdgpu-gotprel) 1, implicit-def $scc + S_CBRANCH_SCC0 %bb.25, implicit killed $scc + + bb.18: + successors: %bb.24(0x80000000) + + undef %108.sub0:vreg_256 = COPY %170 + %108.sub1:vreg_256 = COPY %170 + %108.sub2:vreg_256 = COPY %170 + %108.sub3:vreg_256 = COPY %170 + %108.sub4:vreg_256 = COPY %170 + %108.sub5:vreg_256 = COPY %170 + %108.sub6:vreg_256 = COPY %170 + %108.sub7:vreg_256 = COPY %170 + %207:vreg_256 = COPY killed %108 + S_BRANCH %bb.24 + + bb.19: + successors: %bb.20(0x80000000) + + %84:vgpr_32 = GLOBAL_LOAD_DWORD undef %86:vreg_64, 0, 0, 0, 0, implicit $exec + undef %178.sub5:vreg_256 = COPY killed %84 + %3:vreg_256 = COPY killed %178 + %203:vreg_256 = COPY killed %3 + + bb.20: + successors: %bb.21(0x80000000) + + %4:vreg_256 = COPY killed %203 + %5:vreg_256 = COPY killed %4 + %204:vreg_256 = COPY killed %5 + + bb.21: + successors: %bb.22(0x80000000) + + %6:vreg_256 = COPY killed %204 + %7:vreg_256 = COPY killed %6 + %205:vreg_256 = COPY killed %7 + + bb.22: + successors: %bb.23(0x80000000) + + %8:vreg_256 = COPY killed %205 + %9:vreg_256 = COPY killed %8 + %9.sub2:vreg_256 = COPY %109 + %206:vreg_256 = COPY killed %9 + + bb.23: + successors: %bb.24(0x80000000) + + %10:vreg_256 = COPY killed %206 + %11:vreg_256 = COPY killed %10 + %207:vreg_256 = COPY killed %11 + + bb.24: + successors: %bb.26(0x80000000) + + %12:vreg_256 = COPY killed %207 + %13:vreg_256 = COPY killed %12 + %208:vreg_256 = COPY killed %13 + S_BRANCH %bb.26 + + bb.25: + successors: %bb.26(0x80000000) + + undef %112.sub0:vreg_256 = COPY %170 + %112.sub1:vreg_256 = COPY %170 + %112.sub2:vreg_256 = COPY %170 + %112.sub3:vreg_256 = COPY %170 + %112.sub4:vreg_256 = COPY %170 + %112.sub5:vreg_256 = COPY %170 + %112.sub6:vreg_256 = COPY %170 + %112.sub7:vreg_256 = COPY %170 + %208:vreg_256 = COPY killed %112 + + bb.26: + successors: %bb.8(0x7c000000), %bb.27(0x04000000) + + %14:vreg_256 = COPY killed %208 + $vcc = COPY %115 + S_CBRANCH_VCCNZ %bb.8, implicit killed $vcc + S_BRANCH %bb.27 + + bb.27: + successors: %bb.28(0x80000000) + + %187:vgpr_32 = COPY %14.sub7 + %192:vgpr_32 = COPY %14.sub6 + %193:vgpr_32 = COPY %14.sub5 + %194:vgpr_32 = COPY %14.sub4 + undef %188.sub0:vreg_128 = COPY killed %194 + %188.sub1:vreg_128 = COPY killed %193 + %188.sub2:vreg_128 = COPY killed %192 + %188.sub3:vreg_128 = COPY killed %187 + GLOBAL_STORE_DWORDX4 undef %123:vreg_64, killed %188, 0, 0, 0, 0, implicit $exec + %195:vgpr_32 = COPY %14.sub3 + %200:vgpr_32 = COPY %14.sub2 + %201:vgpr_32 = COPY %14.sub1 + %202:vgpr_32 = COPY killed %14.sub0 + undef %196.sub0:vreg_128 = COPY killed %202 + %196.sub1:vreg_128 = COPY killed %201 + %196.sub2:vreg_128 = COPY killed %200 + %196.sub3:vreg_128 = COPY killed %195 + GLOBAL_STORE_DWORDX4 undef %131:vreg_64, killed %196, 0, 0, 0, 0, implicit $exec + + bb.28: + S_ENDPGM 0 + +...