diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -1212,7 +1212,10 @@ } ++I; } - LIS->extendToIndices(SR, EndPoints); + SmallVector Undefs; + IntB.computeSubRangeUndefs(Undefs, SR.LaneMask, *MRI, + *LIS->getSlotIndexes()); + LIS->extendToIndices(SR, EndPoints, Undefs); } // If any dead defs were extended, truncate them. shrinkToUses(&IntB); diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-removepartial-extend-undef-subrange.mir b/llvm/test/CodeGen/AMDGPU/coalescer-removepartial-extend-undef-subrange.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/coalescer-removepartial-extend-undef-subrange.mir @@ -0,0 +1,256 @@ +# RUN: llc -march=amdgcn -run-pass simple-register-coalescing -verify-machineinstrs -o - %s | FileCheck %s +# +# CHECK-LABEL: bb.4: +# CHECK-NOT: COPY +# CHECK-LABEL: bb.5: +# +# The failure occurs when the coalescer tries to removePartialRedundency() on the +# "%10:vreg_512 = COPY %28" in bb.4. The coalescer tries to prune and extend each +# subrange of %10, the offending subrange has a def location in the predecessor +# path going through bb.26 into bb.4. But for another predecessor path 0->1->27->4, +# the subrange has only one undef location in bb.0. Then in findReachingDef(), it +# will search all the way back through the predecessor path: 4->27->1->0 and failed +# to find a reaching-def (the undef in bb.0) in that path and return error message: +# "Use of $noreg does not have a corresponding definition on every path +# LLVM ERROR: Use not jointly dominated by defs" + +--- +name: _amdgpu_ps_main +alignment: 1 +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.2 + liveins: $sgpr2, $sgpr3, $vgpr3 + + %0:vgpr_32 = COPY $vgpr3 + %1:sgpr_32 = COPY $sgpr3 + %2:sgpr_32 = COPY $sgpr2 + %3:sreg_64 = S_GETPC_B64 + %4:sreg_64 = COPY %3 + %4.sub0:sreg_64 = COPY %2 + %5:sgpr_128 = S_LOAD_DWORDX4_IMM %4, 0, 0, 0 + %6:sgpr_32 = S_BUFFER_LOAD_DWORD_IMM %5, 1, 0, 0 :: (dereferenceable invariant load 4) + %7:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec + undef %8.sub0:vreg_512 = nnan nsz arcp contract afn reassoc nofpexcept V_MUL_F32_e32 %6, %7, implicit $mode, implicit $exec + %9:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec + undef %10.sub0:vreg_512 = COPY %9 + %10.sub2:vreg_512 = COPY %8.sub0 + %10.sub3:vreg_512 = COPY %8.sub0 + %11:sreg_64 = nofpexcept V_CMP_GT_F32_e64 0, 1065353216, 0, %0, 0, implicit $mode, implicit $exec + %12:sreg_64 = nofpexcept V_CMP_NGT_F32_e64 0, 1065353216, 0, %0, 0, implicit $mode, implicit $exec + %13:sreg_64_xexec = nofpexcept V_CMP_LT_F32_e64 0, 1065353216, 0, %1, 0, implicit $mode, implicit $exec + %14:sreg_64 = nofpexcept V_CMP_GT_F32_e64 0, 0, 0, %0, 0, implicit $mode, implicit $exec + %15:sreg_64 = nofpexcept V_CMP_NGT_F32_e64 0, 0, 0, %0, 0, implicit $mode, implicit $exec + %16:sreg_64 = COPY $exec, implicit-def $exec + %17:sreg_64 = S_AND_B64 %16, %12, implicit-def dead $scc + $exec = S_MOV_B64_term %17 + S_CBRANCH_EXECZ %bb.2, implicit $exec + S_BRANCH %bb.1 + + bb.1: + %18:sreg_64 = nofpexcept V_CMP_NGT_F32_e64 0, 1065353216, 0, %6, 0, implicit $mode, implicit $exec + %19:sreg_64 = S_MOV_B64 -1 + %20:sreg_64 = S_AND_B64 $exec, %18, implicit-def dead $scc + $vcc = COPY %20 + S_CBRANCH_VCCNZ %bb.3, implicit killed $vcc + S_BRANCH %bb.27 + + bb.2: + successors: %bb.13, %bb.17 + + $exec = S_OR_B64 $exec, %16, implicit-def $scc + %21:sreg_64 = COPY $exec, implicit-def $exec + %22:sreg_64 = S_AND_B64 %21, %11, implicit-def dead $scc + $exec = S_MOV_B64_term %22 + S_CBRANCH_EXECZ %bb.17, implicit $exec + S_BRANCH %bb.13 + + bb.3: + %23:sreg_64 = S_MOV_B64 0 + %19:sreg_64 = IMPLICIT_DEF + %24:sreg_64 = IMPLICIT_DEF + %25:sreg_64 = COPY %23 + S_BRANCH %bb.5 + + bb.4: + %26:sreg_64 = S_ANDN2_B64 %11, $exec, implicit-def dead $scc + %27:sreg_64 = S_AND_B64 %19, $exec, implicit-def dead $scc + %11:sreg_64 = S_OR_B64 %26, %27, implicit-def dead $scc + %10:vreg_512 = COPY %28 + S_BRANCH %bb.2 + + bb.5: + successors: %bb.6, %bb.7 + + %28:vreg_512 = COPY %10 + %29:sreg_64 = S_MOV_B64 -1 + %30:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %13, implicit $exec + V_CMP_NE_U32_e32 1, %30, implicit-def $vcc, implicit $exec + $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc + %31:sreg_64 = S_MOV_B64 -1 + %10:vreg_512 = COPY %28 + S_CBRANCH_VCCNZ %bb.7, implicit killed $vcc + S_BRANCH %bb.6 + + bb.6: + successors: %bb.8, %bb.9 + + %31:sreg_64 = S_MOV_B64 0 + %10:vreg_512 = COPY %28 + %32:sreg_64 = COPY $exec, implicit-def $exec + %33:sreg_64 = S_AND_B64 %32, %15, implicit-def dead $scc + $exec = S_MOV_B64_term %33 + S_CBRANCH_EXECZ %bb.9, implicit $exec + S_BRANCH %bb.8 + + bb.7: + successors: %bb.10, %bb.11 + + %24:sreg_64 = S_OR_B64 %24, $exec, implicit-def dead $scc + %34:sreg_64 = COPY $exec, implicit-def $exec + %35:sreg_64 = S_AND_B64 %34, %31, implicit-def dead $scc + $exec = S_MOV_B64_term %35 + S_CBRANCH_EXECZ %bb.11, implicit $exec + S_BRANCH %bb.10 + + bb.8: + %8.sub2:vreg_512 = COPY %28.sub2 + %8.sub3:vreg_512 = COPY %28.sub3 + %31:sreg_64 = COPY $exec + %10:vreg_512 = COPY %8 + + bb.9: + $exec = S_OR_B64 $exec, %32, implicit-def $scc + S_BRANCH %bb.7 + + bb.10: + successors: %bb.12, %bb.25 + + %36:sreg_64 = S_MOV_B64 -1 + %37:sreg_64 = COPY $exec, implicit-def $exec + %38:sreg_64 = S_AND_B64 %37, %15, implicit-def dead $scc + $exec = S_MOV_B64_term %38 + S_CBRANCH_EXECZ %bb.25, implicit $exec + S_BRANCH %bb.12 + + bb.11: + successors: %bb.26(0x04000000), %bb.5(0x7c000000) + + $exec = S_OR_B64 $exec, %34, implicit-def $scc + %39:sreg_64 = S_AND_B64 $exec, %29, implicit-def $scc + %25:sreg_64 = S_OR_B64 %39, %25, implicit-def $scc + %40:sreg_64 = S_ANDN2_B64 %19, $exec, implicit-def dead $scc + %41:sreg_64 = S_AND_B64 %24, $exec, implicit-def dead $scc + %19:sreg_64 = S_OR_B64 %40, %41, implicit-def dead $scc + $exec = S_ANDN2_B64_term $exec, %25, implicit-def $scc + S_CBRANCH_EXECNZ %bb.5, implicit $exec + S_BRANCH %bb.26 + + bb.12: + %36:sreg_64 = S_XOR_B64 $exec, -1, implicit-def dead $scc + S_BRANCH %bb.25 + + bb.13: + successors: %bb.14, %bb.18 + + %42:sreg_64 = S_MOV_B64 -1 + %43:sreg_64 = COPY %42 + %44:sreg_64 = COPY $exec, implicit-def $exec + %45:sreg_64 = S_AND_B64 %44, %12, implicit-def dead $scc + $exec = S_MOV_B64_term %45 + S_CBRANCH_EXECZ %bb.18, implicit $exec + S_BRANCH %bb.14 + + bb.14: + successors: %bb.19, %bb.20 + + %46:sreg_64 = S_MOV_B64 0 + %47:sreg_64 = COPY %46 + %48:sreg_64 = COPY $exec, implicit-def $exec + %49:sreg_64 = S_AND_B64 %48, %15, implicit-def dead $scc + $exec = S_MOV_B64_term %49 + S_CBRANCH_EXECZ %bb.20, implicit $exec + S_BRANCH %bb.19 + + bb.15: + S_BRANCH %bb.23 + + bb.16: + + bb.17: + $exec = S_OR_B64 $exec, %21, implicit-def $scc + %50:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + EXP_DONE 0, %50, undef %51:vgpr_32, undef %52:vgpr_32, undef %53:vgpr_32, -1, 0, 1, implicit $exec + S_ENDPGM 0 + + bb.18: + successors: %bb.21, %bb.16 + + $exec = S_OR_B64 $exec, %44, implicit-def $scc + %54:sreg_64 = COPY $exec, implicit-def $exec + %55:sreg_64 = S_AND_B64 %54, %43, implicit-def dead $scc + $exec = S_MOV_B64_term %55 + S_CBRANCH_EXECZ %bb.16, implicit $exec + S_BRANCH %bb.21 + + bb.19: + undef %56.sub0:vreg_512 = COPY %10.sub2 + %56.sub3:vreg_512 = COPY %10.sub3 + %57:vreg_512 = COPY %56 + %47:sreg_64 = COPY $exec + %10:vreg_512 = COPY %57 + + bb.20: + $exec = S_OR_B64 $exec, %48, implicit-def $scc + %58:vreg_512 = COPY %10 + %59:sreg_64 = COPY %47 + %60:sreg_64 = S_ORN2_B64 %59, $exec, implicit-def dead $scc + %43:sreg_64 = COPY %60 + %10:vreg_512 = COPY %58 + S_BRANCH %bb.18 + + bb.21: + successors: %bb.22, %bb.23 + + %61:sreg_64 = COPY $exec, implicit-def $exec + %62:sreg_64 = S_AND_B64 %61, %12, implicit-def dead $scc + $exec = S_MOV_B64_term %62 + S_CBRANCH_EXECZ %bb.23, implicit $exec + S_BRANCH %bb.22 + + bb.22: + %63:vgpr_32 = COPY %10.sub0 + %64:sreg_64 = S_MOV_B64 0 + %65:sreg_64 = COPY %64 + %66:vgpr_32 = COPY %63 + S_BRANCH %bb.24 + + bb.23: + S_BRANCH %bb.16 + + bb.24: + successors: %bb.15(0x04000000), %bb.24(0x7c000000) + + %67:sreg_64 = S_AND_B64 $exec, %14, implicit-def $scc + %65:sreg_64 = S_OR_B64 %67, %65, implicit-def $scc + %66:vgpr_32 = V_CNDMASK_B32_e64 0, %66, 0, %10.sub3, %13, implicit $exec + $exec = S_ANDN2_B64_term $exec, %65, implicit-def $scc + S_CBRANCH_EXECNZ %bb.24, implicit $exec + S_BRANCH %bb.15 + + bb.25: + $exec = S_OR_B64 $exec, %37, implicit-def $scc + %24:sreg_64 = S_ANDN2_B64 %24, $exec, implicit-def dead $scc + %29:sreg_64 = S_ORN2_B64 %36, $exec, implicit-def dead $scc + S_BRANCH %bb.11 + + bb.26: + $exec = S_OR_B64 $exec, %25, implicit-def $scc + S_BRANCH %bb.4 + + bb.27: + %28:vreg_512 = COPY %10 + S_BRANCH %bb.4 + +...