diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -1213,7 +1213,10 @@ } ++I; } - LIS->extendToIndices(SR, EndPoints); + SmallVector Undefs; + IntB.computeSubRangeUndefs(Undefs, SR.LaneMask, *MRI, + *LIS->getSlotIndexes()); + LIS->extendToIndices(SR, EndPoints, Undefs); } // If any dead defs were extended, truncate them. shrinkToUses(&IntB); diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-removepartial-extend-undef-subrange.mir b/llvm/test/CodeGen/AMDGPU/coalescer-removepartial-extend-undef-subrange.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/coalescer-removepartial-extend-undef-subrange.mir @@ -0,0 +1,74 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass simple-register-coalescing -verify-machineinstrs -o - %s | FileCheck %s +# +# The failure occurs when the coalescer tries to removePartialRedundency() on the +# "%2:vreg_64 = COPY %3" in bb.1. The coalescer tries to prune and extend each +# subrange of %2, the subrange for %2.sub1 has a def location (in bb.2) in the +# predecessor path 2->3->1. But for another predecessor path 0->4->1, +# the subrange has only one undef location in bb.0. If we don't compute Undef set, +# it will fail to find the reaching def for %2.sub1 in predecessor bb.4 and bb.0 +# and crash with error message: +# "Use of $noreg does not have a corresponding definition on every path +# LLVM ERROR: Use not jointly dominated by defs" + +--- +name: _amdgpu_ps_main +alignment: 1 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: _amdgpu_ps_main + ; CHECK: bb.0: + ; CHECK: successors: %bb.2(0x40000000), %bb.4(0x40000000) + ; CHECK: liveins: $sgpr2, $sgpr3, $vgpr3 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr2 + ; CHECK: undef %1.sub0:vreg_64 = COPY [[COPY]] + ; CHECK: undef %2.sub0:vreg_64 = COPY [[COPY]] + ; CHECK: S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc + ; CHECK: S_BRANCH %bb.4 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: S_NOP 0, implicit %2.sub0 + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x04000000), %bb.2(0x7c000000) + ; CHECK: [[COPY1:%[0-9]+]]:vreg_64 = COPY %2 + ; CHECK: %1.sub0:vreg_64 = COPY [[COPY1]].sub0 + ; CHECK: %2:vreg_64 = COPY %1 + ; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit undef $exec + ; CHECK: S_BRANCH %bb.3 + ; CHECK: bb.3: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: %2:vreg_64 = COPY [[COPY1]] + ; CHECK: S_BRANCH %bb.1 + ; CHECK: bb.4: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: S_BRANCH %bb.1 + bb.0: + liveins: $sgpr2, $sgpr3, $vgpr3 + + %0:sgpr_32 = COPY $sgpr2 + undef %1.sub0:vreg_64 = COPY %0 + undef %2.sub0:vreg_64 = COPY %0 + S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc + S_BRANCH %bb.4 + + bb.1: + %2:vreg_64 = COPY %3 + S_NOP 0, implicit %2.sub0 + + bb.2: + successors: %bb.3(0x04000000), %bb.2(0x7c000000) + + %3:vreg_64 = COPY %2 + %1.sub0:vreg_64 = COPY %3.sub0 + %2:vreg_64 = COPY %1 + S_CBRANCH_EXECNZ %bb.2, implicit undef $exec + S_BRANCH %bb.3 + + bb.3: + S_BRANCH %bb.1 + + bb.4: + %3:vreg_64 = COPY %2 + S_BRANCH %bb.1 + +...