Index: include/llvm/CodeGen/ScheduleDAGInstrs.h =================================================================== --- include/llvm/CodeGen/ScheduleDAGInstrs.h +++ include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -374,6 +374,9 @@ /// Returns a mask for which lanes get read/written by the given (register) /// machine operand. LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const; + + /// Returns true if the def register in \p MO has no uses. + bool deadDefHasNoUse(const MachineOperand &MO); }; /// Creates a new SUnit and return a ptr to it. Index: lib/CodeGen/ScheduleDAGInstrs.cpp =================================================================== --- lib/CodeGen/ScheduleDAGInstrs.cpp +++ lib/CodeGen/ScheduleDAGInstrs.cpp @@ -373,6 +373,16 @@ return TRI->getSubRegIndexLaneMask(SubReg); } +bool ScheduleDAGInstrs::deadDefHasNoUse(const MachineOperand &MO) { + auto RegUse = CurrentVRegUses.find(MO.getReg()); + if (RegUse == CurrentVRegUses.end()) + return true; + + // FIXME: SparseMultiSet prevents making method const + const MachineInstr *UseInst = RegUse->SU->getInstr(); + return UseInst->getOperand(RegUse->OperandIndex).getSubReg() != MO.getSubReg(); +} + /// Adds register output and data dependencies from this SUnit to instructions /// that occur later in the same scheduling region if they read from or write to /// the virtual register defined at OperIdx. @@ -402,8 +412,7 @@ } if (MO.isDead()) { - assert(CurrentVRegUses.find(Reg) == CurrentVRegUses.end() && - "Dead defs should have no uses"); + assert(deadDefHasNoUse(MO) && "Dead defs should have no uses"); } else { // Add data dependence to all uses we found so far. const TargetSubtargetInfo &ST = MF.getSubtarget(); Index: test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir @@ -0,0 +1,70 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -run-pass=machine-scheduler -verify-misched -o - %s | FileCheck %s + +# This would assert that a dead def should have no uses, but the dead +# def and use have different subreg indicise. + +--- +name: multi_def_dead_reg_subreg_check +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr24_sgpr25_sgpr26_sgpr27' + scratchWaveOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr32' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + privateSegmentWaveByteOffset: { reg: '$sgpr33' } +body: | + ; CHECK-LABEL: name: multi_def_dead_reg_subreg_check + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $sgpr6_sgpr7 + ; CHECK: undef %0.sub3:vreg_512 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 0, [[V_MOV_B32_e32_]], implicit $exec + ; CHECK: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: [[COPY:%[0-9]+]]:vreg_512 = COPY %0 + ; CHECK: bb.1: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: BUFFER_STORE_DWORD_OFFEN %0.sub3, undef %5:vgpr_32, $sgpr24_sgpr25_sgpr26_sgpr27, $sgpr32, 0, 0, 0, 0, 0, implicit $exec :: (store 4, align 8, addrspace 5) + ; CHECK: dead %6:vgpr_32 = DS_READ_B32_gfx9 undef %7:vgpr_32, 0, 0, implicit $exec + ; CHECK: dead %8:vreg_64 = DS_READ_B64_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec + ; CHECK: dead %9:vreg_128 = DS_READ_B128_gfx9 [[V_ADD_U32_e32_]], 0, 0, implicit $exec + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; CHECK: undef %11.sub1:vreg_512 = COPY [[COPY]].sub1 + ; CHECK: INLINEASM &"", 1, 851978, def dead [[COPY1]], 851978, def dead [[COPY]].sub1, 2147483657, [[COPY1]], 2147549193, [[COPY]].sub1 + ; CHECK: %11.sub0:vreg_512 = COPY [[COPY]].sub0 + ; CHECK: %11.sub3:vreg_512 = COPY [[COPY]].sub3 + ; CHECK: dead %10:vgpr_32 = V_ADD_I32_e32 4, [[V_MOV_B32_e32_1]], implicit-def dead $vcc, implicit $exec + ; CHECK: %11.sub2:vreg_512 = COPY undef [[V_MOV_B32_e32_]] + ; CHECK: %11.sub5:vreg_512 = COPY undef [[V_MOV_B32_e32_]] + ; CHECK: [[COPY2:%[0-9]+]]:vreg_512 = COPY %11 + ; CHECK: S_BRANCH %bb.1 + bb.0: + liveins: $sgpr6_sgpr7 + + undef %0.sub3:vreg_512 = V_MOV_B32_e32 0, implicit $exec + %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %2:vgpr_32 = V_ADD_U32_e32 0, %1, implicit $exec + %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %4:vreg_512 = COPY %0 + + bb.1: + BUFFER_STORE_DWORD_OFFEN %0.sub3, undef %5:vgpr_32, $sgpr24_sgpr25_sgpr26_sgpr27, $sgpr32, 0, 0, 0, 0, 0, implicit $exec :: (store 4, align 8, addrspace 5) + %6:vgpr_32 = DS_READ_B32_gfx9 undef %7:vgpr_32, 0, 0, implicit $exec + %8:vreg_64 = DS_READ_B64_gfx9 %1, 0, 0, implicit $exec + %9:vreg_128 = DS_READ_B128_gfx9 %2, 0, 0, implicit $exec + %10:vgpr_32 = V_ADD_I32_e32 4, %3, implicit-def dead $vcc, implicit $exec + undef %11.sub0:vreg_512 = COPY %4.sub0 + %12:vgpr_32 = COPY %4.sub0 + %11.sub1:vreg_512 = COPY %4.sub1 + INLINEASM &"", 1, 851978, def dead %12, 851978, def dead %4.sub1, 2147483657, %12, 2147549193, %4.sub1 + %11.sub2:vreg_512 = COPY undef %1 + %11.sub3:vreg_512 = COPY %4.sub3 + %11.sub5:vreg_512 = COPY undef %1 + %4:vreg_512 = COPY %11 + S_BRANCH %bb.1 + +...