Index: lib/CodeGen/RegisterCoalescer.cpp =================================================================== --- lib/CodeGen/RegisterCoalescer.cpp +++ lib/CodeGen/RegisterCoalescer.cpp @@ -2613,6 +2613,12 @@ return CR_Erase; } + // The remaining checks apply to the lanes, which aren't tracked here. This + // was already decided to be OK via the following CR_Replace condition. + // CR_Replace. + if (SubRangeJoin) + return CR_Replace; + // If the lanes written by this instruction were all undef in OtherVNI, it is // still safe to join the live ranges. This can't be done with a simple value // mapping, though - OtherVNI will map to multiple values: Index: test/CodeGen/AMDGPU/regcoalesce-cannot-join-failures.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/regcoalesce-cannot-join-failures.mir @@ -0,0 +1,118 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-coalescing -run-pass=simple-register-coalescing -verify-machineinstrs -o - %s | FileCheck %s + +--- +name: couldnt_join_subrange_implicit_def_pred_block +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: couldnt_join_subrange_implicit_def_pred_block + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: undef %0.sub0:sreg_64_xexec = IMPLICIT_DEF + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: %0.sub1:sreg_64_xexec = COPY %0.sub0 + ; CHECK: S_BRANCH %bb.2 + ; CHECK: bb.2: + ; CHECK: S_ENDPGM implicit %0 + bb.0: + successors: %bb.1 + + undef %0.sub0:sreg_64_xexec = IMPLICIT_DEF + + bb.1: + successors: %bb.2 + + %1:sreg_64 = COPY %0:sreg_64_xexec + %0.sub1:sreg_64_xexec = COPY %0.sub0:sreg_64_xexec + S_BRANCH %bb.2 + + bb.2: + dead %2:sreg_32_xm0 = COPY %0.sub0:sreg_64_xexec + S_ENDPGM implicit killed %1 + +... +--- +name: couldnt_join_subrange_no_implicit_def_inst +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: name: couldnt_join_subrange_no_implicit_def_inst + ; CHECK: undef %0.sub0:sreg_64 = S_MOV_B32 0 + ; CHECK: %0.sub1:sreg_64 = COPY %0.sub0 + ; CHECK: S_ENDPGM implicit %0.sub1 + undef %0.sub0:sreg_64 = S_MOV_B32 0 + %1:sreg_64 = COPY %0:sreg_64 + %0.sub1:sreg_64 = COPY %0.sub0:sreg_64 + S_ENDPGM implicit %1.sub1:sreg_64 + +... +--- +name: couldnt_join_subrange0 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: couldnt_join_subrange0 + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: undef %0.sub1:sreg_64 = S_MOV_B32 -1 + ; CHECK: bb.1: + ; CHECK: %0.sub0:sreg_64 = S_MOV_B32 0 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY %0 + ; CHECK: dead %0.sub1:sreg_64 = COPY %0.sub0 + ; CHECK: S_ENDPGM implicit [[COPY]].sub1 + bb.0: + successors: %bb.1 + undef %0.sub1:sreg_64 = S_MOV_B32 -1 + + bb.1: + %0.sub0:sreg_64 = S_MOV_B32 0 + %1:sreg_64 = COPY %0:sreg_64 + dead %0.sub1:sreg_64 = COPY %0.sub0:sreg_64 + S_ENDPGM implicit %1.sub1:sreg_64 + +... +--- +name: lanes_not_tracked_subreg_join_couldnt_join_subrange +tracksRegLiveness: true +body: | + bb.0: + + ; CHECK-LABEL: name: lanes_not_tracked_subreg_join_couldnt_join_subrange + ; CHECK: undef %0.sub0:sreg_64_xexec = S_MOV_B32 0 + ; CHECK: %0.sub1:sreg_64_xexec = S_MOV_B32 0 + ; CHECK: S_NOP 0, implicit %0.sub1 + ; CHECK: S_NOP 0, implicit %0 + ; CHECK: S_ENDPGM + undef %0.sub0:sreg_64_xexec = S_MOV_B32 0 + %1:sreg_64 = COPY %0 + %0.sub1:sreg_64_xexec = S_MOV_B32 0 + S_NOP 0, implicit %0.sub1 + S_NOP 0, implicit %1 + S_ENDPGM + +... +--- +name: couldnt_join_subrange1 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: couldnt_join_subrange1 + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: undef %0.sub0:sreg_64_xexec = S_MOV_B32 0 + ; CHECK: %0.sub1:sreg_64_xexec = COPY %0.sub0 + ; CHECK: bb.1: + ; CHECK: S_NOP 0, implicit %0.sub1 + ; CHECK: S_ENDPGM implicit %0 + bb.0: + successors: %bb.1 + + undef %0.sub0:sreg_64_xexec = S_MOV_B32 0 + %1:sreg_64 = COPY %0 + %0.sub1:sreg_64_xexec = COPY %0.sub0 + + bb.1: + + S_NOP 0, implicit %0.sub1 + S_ENDPGM implicit %1 + +...