Index: llvm/lib/CodeGen/LiveRangeEdit.cpp =================================================================== --- llvm/lib/CodeGen/LiveRangeEdit.cpp +++ llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -302,13 +302,15 @@ SmallVector RegsToErase; bool ReadsPhysRegs = false; bool isOrigDef = false; - unsigned Dest; + Register Dest; + unsigned DestSubReg; // Only optimize rematerialize case when the instruction has one def, since // otherwise we could leave some dead defs in the code. This case is // extremely rare. if (VRM && MI->getOperand(0).isReg() && MI->getOperand(0).isDef() && MI->getDesc().getNumDefs() == 1) { Dest = MI->getOperand(0).getReg(); + DestSubReg = MI->getOperand(0).getSubReg(); unsigned Original = VRM->getOriginal(Dest); LiveInterval &OrigLI = LIS.getInterval(Original); VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx); @@ -386,8 +388,18 @@ if (isOrigDef && DeadRemats && !HasLiveVRegUses && TII.isTriviallyReMaterializable(*MI, AA)) { LiveInterval &NewLI = createEmptyIntervalFrom(Dest, false); - VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator()); + VNInfo::Allocator &Alloc = LIS.getVNInfoAllocator(); + VNInfo *VNI = NewLI.getNextValue(Idx, Alloc); NewLI.addSegment(LiveInterval::Segment(Idx, Idx.getDeadSlot(), VNI)); + + if (DestSubReg) { + const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); + auto *SR = NewLI.createSubRange( + Alloc, TRI->getSubRegIndexLaneMask(DestSubReg)); + SR->addSegment(LiveInterval::Segment(Idx, Idx.getDeadSlot(), + SR->getNextValue(Idx, Alloc))); + } + pop_back(); DeadRemats->insert(MI); const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); Index: llvm/test/CodeGen/AMDGPU/remat-vop.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/remat-vop.mir +++ llvm/test/CodeGen/AMDGPU/remat-vop.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs --stress-regalloc=2 -start-before=greedy,0 -stop-after=virtregrewriter,1 -o - %s | FileCheck -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-regalloc --stress-regalloc=2 -start-before=greedy,0 -stop-after=virtregrewriter,1 -o - %s | FileCheck -check-prefix=GCN %s --- name: test_remat_v_mov_b32_e32 @@ -3743,3 +3743,28 @@ S_NOP 0, implicit %3 S_ENDPGM 0 ... + +# Make sure there's no verifier error after making a subregister def dead. The +# dead interval still needs a subrange. + +--- +name: test_remat_subreg_def +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: test_remat_subreg_def + ; GCN: renamable $vgpr1 = V_MOV_B32_e32 1, implicit $exec + ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 3, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 + ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 2, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit renamable $vgpr0_vgpr1 + ; GCN-NEXT: S_ENDPGM 0 + %0:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + undef %1.sub0:vreg_64 = V_MOV_B32_e32 2, implicit $exec + %2:vgpr_32 = V_MOV_B32_e32 3, implicit $exec + S_NOP 0, implicit %0 + S_NOP 0, implicit %2 + S_NOP 0, implicit %1 + S_ENDPGM 0 +...