diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -1212,7 +1212,10 @@ } ++I; } - LIS->extendToIndices(SR, EndPoints); + SmallVector Undefs; + IntB.computeSubRangeUndefs(Undefs, SR.LaneMask, *MRI, + *LIS->getSlotIndexes()); + LIS->extendToIndices(SR, EndPoints, Undefs); } // If any dead defs were extended, truncate them. shrinkToUses(&IntB); diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-removepartial-extend-undef-subrange.mir b/llvm/test/CodeGen/AMDGPU/coalescer-removepartial-extend-undef-subrange.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/coalescer-removepartial-extend-undef-subrange.mir @@ -0,0 +1,104 @@ +# RUN: llc -march=amdgcn -run-pass simple-register-coalescing -verify-machineinstrs -o - %s | FileCheck %s +# +# CHECK-LABEL: bb.3: +# CHECK-NOT: COPY +# CHECK-LABEL: bb.4: +# +# The failure occurs when the coalescer tries to removePartialRedundency() on the +# "%9:vreg_512 = COPY %18" in bb.3. The coalescer tries to prune and extend each +# subrange of %9, the offending subrange has a def location (in bb.5) in the +# predecessor path 4->5->6->9. But for another predecessor path 0->1->10, +# the subrange has only one undef location in bb.0. Then in findReachingDef(), it +# will search all the way back through the predecessor path: 3->10->1->0 and failed +# to find a reaching-def (the undef in bb.0) in that path and return error message: +# "Use of $noreg does not have a corresponding definition on every path +# LLVM ERROR: Use not jointly dominated by defs" + +--- +name: _amdgpu_ps_main +alignment: 1 +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.2 + liveins: $sgpr2, $sgpr3, $vgpr3 + + %0:vgpr_32 = COPY $vgpr3 + %1:sgpr_32 = COPY $sgpr2 + %2:sreg_64 = S_GETPC_B64 + %3:sreg_64 = COPY %2 + %3.sub0:sreg_64 = COPY %1 + %4:sgpr_128 = S_LOAD_DWORDX4_IMM %3, 0, 0, 0 + %5:sgpr_32 = S_BUFFER_LOAD_DWORD_IMM %4, 1, 0, 0 :: (dereferenceable invariant load 4) + %6:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec + undef %7.sub0:vreg_512 = nnan nsz arcp contract afn reassoc nofpexcept V_MUL_F32_e32 %5, %6, implicit $mode, implicit $exec + %8:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec + undef %9.sub0:vreg_512 = COPY %8 + %9.sub2:vreg_512 = COPY %7.sub0 + %9.sub3:vreg_512 = COPY %7.sub0 + %10:sreg_64 = nofpexcept V_CMP_GT_F32_e64 0, 1065353216, 0, %0, 0, implicit $mode, implicit $exec + %11:sreg_64 = COPY $exec, implicit-def $exec + %12:sreg_64 = S_AND_B64 %11, %10, implicit-def dead $scc + $exec = S_MOV_B64_term %12 + S_CBRANCH_EXECZ %bb.2, implicit $exec + S_BRANCH %bb.1 + + bb.1: + %13:sreg_64 = nofpexcept V_CMP_NGT_F32_e64 0, 1065353216, 0, %5, 0, implicit $mode, implicit $exec + %14:sreg_64 = S_AND_B64 $exec, %13, implicit-def dead $scc + $vcc = COPY %14 + S_CBRANCH_VCCNZ %bb.4, implicit killed $vcc + S_BRANCH %bb.10 + + bb.2: + successors: %bb.8, %bb.7 + + $exec = S_OR_B64 $exec, %11, implicit-def $scc + %15:sreg_64 = COPY $exec, implicit-def $exec + %16:sreg_64 = S_AND_B64 %15, %10, implicit-def dead $scc + $exec = S_MOV_B64_term %16 + S_CBRANCH_EXECZ %bb.7, implicit $exec + S_BRANCH %bb.8 + + bb.3: + %17:sreg_64 = S_ANDN2_B64 %10, $exec, implicit-def dead $scc + %10:sreg_64 = S_OR_B64 %17, $exec, implicit-def dead $scc + %9:vreg_512 = COPY %18 + S_BRANCH %bb.2 + + bb.4: + %18:vreg_512 = COPY %9 + S_BRANCH %bb.5 + + bb.5: + %7.sub2:vreg_512 = COPY %18.sub2 + %7.sub3:vreg_512 = COPY %18.sub3 + %9:vreg_512 = COPY %7 + S_BRANCH %bb.6 + + bb.6: + successors: %bb.9(0x04000000), %bb.4(0x7c000000) + + $exec = S_ANDN2_B64_term $exec, 0, implicit-def $scc + S_CBRANCH_EXECNZ %bb.4, implicit $exec + S_BRANCH %bb.9 + + bb.7: + $exec = S_OR_B64 $exec, %15, implicit-def $scc + %19:vgpr_32 = COPY %9.sub0 + EXP_DONE 0, %19, undef %20:vgpr_32, undef %21:vgpr_32, undef %22:vgpr_32, -1, 0, 1, implicit $exec + S_ENDPGM 0 + + bb.8: + %23:vreg_512 = COPY %9 + %9:vreg_512 = COPY %23 + S_BRANCH %bb.7 + + bb.9: + S_BRANCH %bb.3 + + bb.10: + %18:vreg_512 = COPY %9 + S_BRANCH %bb.3 + +...