diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -2686,14 +2686,8 @@ return CR_Replace; // Check for simple erasable conflicts. - if (DefMI->isImplicitDef()) { - // We need the def for the subregister if there is nothing else live at the - // subrange at this point. - if (TrackSubRegLiveness - && (V.WriteLanes & (OtherV.ValidLanes | OtherV.WriteLanes)).none()) - return CR_Replace; + if (DefMI->isImplicitDef()) return CR_Erase; - } // Include the non-conflict where DefMI is a coalescable copy that kills // OtherVNI. We still want the copy erased and value numbers merged. diff --git a/llvm/test/CodeGen/AMDGPU/coalescing_makes_lanes_undef.mir b/llvm/test/CodeGen/AMDGPU/coalescing_makes_lanes_undef.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/coalescing_makes_lanes_undef.mir @@ -0,0 +1,49 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx803 -run-pass simple-register-coalescing -verify-machineinstrs -o - %s | FileCheck %s + +# Register coalescer is going to eliminate %2:sgpr_32 = COPY %1.sub0 from bb.1 +# by joining %2 and %1.sub0 into %0.sub0 register. Check that when this happen +# the implicit intialization of %0.sub0 in the bb.2 have undef flag +# for the MIR to be valid. + +--- +name: coalescing_makes_lane_undefined +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: coalescing_makes_lane_undefined + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: S_CBRANCH_SCC0 %bb.2, implicit undef $scc + ; CHECK: bb.1: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: undef %0.sub0:sgpr_64 = S_MOV_B32 1 + ; CHECK: %0.sub1:sgpr_64 = S_MOV_B32 2 + ; CHECK: S_BRANCH %bb.3 + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: undef %0.sub0:sgpr_64 = IMPLICIT_DEF + ; CHECK: bb.3: + ; CHECK: S_NOP 0, implicit %0.sub0 + ; CHECK: S_NOP 0, implicit %0 + bb.0: + successors: %bb.1, %bb.2 + S_CBRANCH_SCC0 %bb.2, implicit undef $scc + + bb.1: + successors: %bb.3 + undef %1.sub0:sgpr_64 = S_MOV_B32 1 + %1.sub1:sgpr_64 = S_MOV_B32 2 + %2:sgpr_32 = COPY %1.sub0 ; copy to be joined + S_BRANCH %bb.3 + + bb.2: + successors: %bb.3 + %2:sgpr_32 = IMPLICIT_DEF + undef %1.sub0:sgpr_64 = IMPLICIT_DEF + %1.sub1:sgpr_64 = IMPLICIT_DEF + + bb.3: + S_NOP 0, implicit killed %2 + S_NOP 0, implicit killed %1 + +...