Index: lib/CodeGen/LiveIntervals.cpp =================================================================== --- lib/CodeGen/LiveIntervals.cpp +++ lib/CodeGen/LiveIntervals.cpp @@ -1288,6 +1288,20 @@ const SlotIndex SplitPos = NewIdxDef; OldIdxVNI = OldIdxIn->valno; + SlotIndex NewDefEndPoint = std::next(NewIdxIn)->end; + LiveRange::iterator Prev = std::prev(OldIdxIn); + if (OldIdxIn != LR.begin() && + SlotIndex::isEarlierInstr(NewIdx, Prev->end)) { + // If the segment before OldIdx read a value defined earlier than + // NewIdx, the moved instruction also reads and forwards that + // value. Extend the lifetime of the new def point. + + // Extend to the where the previous range started, unless there is + // another redef first. + NewDefEndPoint = std::min(OldIdxIn->start, + std::next(NewIdxOut)->start); + } + // Merge the OldIdxIn and OldIdxOut segments into OldIdxOut. OldIdxOut->valno->def = OldIdxIn->start; *OldIdxOut = LiveRange::Segment(OldIdxIn->start, OldIdxOut->end, @@ -1305,7 +1319,8 @@ // There is no gap between NewSegment and the previous segment. *NewSegment = LiveRange::Segment(Next->start, SplitPos, Next->valno); - *Next = LiveRange::Segment(SplitPos, Next->end, OldIdxVNI); + + *Next = LiveRange::Segment(SplitPos, NewDefEndPoint, OldIdxVNI); Next->valno->def = SplitPos; } else { // There is a gap between NewSegment and the previous segment. Index: test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir @@ -0,0 +1,134 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -verify-misched -run-pass=machine-scheduler -o - %s | FileCheck %s + +--- +name: handleMoveUp_incorrect_interval +tracksRegLiveness: true +liveins: + - { reg: '$sgpr4_sgpr5', virtual-reg: '%0' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + scratchWaveOffsetReg: '$sgpr101' + frameOffsetReg: '$sgpr101' + stackPtrOffsetReg: '$sgpr101' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } +body: | + ; CHECK-LABEL: name: handleMoveUp_incorrect_interval + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $sgpr4_sgpr5 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr4_sgpr5 + ; CHECK: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %2:vgpr_32, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr101, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) + ; CHECK: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sgpr_64 = S_LOAD_DWORDX2_IMM [[COPY]](p4), 0, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4) + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 5329 + ; CHECK: undef %5.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -4, implicit $exec + ; CHECK: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK: bb.1: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: INLINEASM &"", 1, 851978, def dead %11 + ; CHECK: GLOBAL_STORE_DWORD undef %12:vreg_64, [[BUFFER_LOAD_DWORD_OFFEN]], 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; CHECK: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 undef %14:vgpr_32, 0, 0, implicit $exec :: (load 8, addrspace 3) + ; CHECK: INLINEASM &"def $0 $1", 1, 851978, def %15, 851978, def %16 + ; CHECK: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec + ; CHECK: [[DS_READ_B32_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_1]], 0, 0, implicit $exec + ; CHECK: [[DS_READ_B32_gfx9_2:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 undef %20:vgpr_32, 0, 0, implicit $exec + ; CHECK: INLINEASM &"def $0 $1", 1, 851978, def %21, 851978, def %22 + ; CHECK: [[DS_READ_B32_gfx9_3:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_1]], 0, 0, implicit $exec + ; CHECK: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: %5.sub1:vreg_64 = COPY [[V_MOV_B32_e32_]] + ; CHECK: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 1, [[DEF2]], implicit $exec + ; CHECK: [[V_CMP_GT_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_GT_U32_e64 64, [[V_ADD_U32_e32_]], implicit $exec + ; CHECK: [[DEF]].sub1:vreg_64 = COPY [[V_MOV_B32_e32_]] + ; CHECK: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; CHECK: INLINEASM &"", 1, 851978, def dead [[V_MOV_B32_e32_2]], 851978, def dead [[V_MOV_B32_e32_3]], 851977, [[DS_READ_B64_gfx9_]].sub0, 2147483657, [[V_MOV_B32_e32_2]](tied-def 3), 2147549193, [[V_MOV_B32_e32_3]](tied-def 5), 851977, %15, 851977, %16, 851977, [[DS_READ_B32_gfx9_1]], 851977, [[DS_READ_B32_gfx9_]], 851977, [[DS_READ_B32_gfx9_3]], 851977, [[DS_READ_B32_gfx9_2]] + ; CHECK: DS_WRITE_B32_gfx9 undef %28:vgpr_32, %21, 0, 0, implicit $exec :: (store 4, addrspace 3) + ; CHECK: DS_WRITE_B32_gfx9 undef %29:vgpr_32, %22, 0, 0, implicit $exec :: (store 4, addrspace 3) + ; CHECK: DS_WRITE_B64_gfx9 undef %30:vgpr_32, %5, 0, 0, implicit $exec :: (store 8, addrspace 3) + ; CHECK: undef %31.sub1:vreg_64 = FLAT_LOAD_DWORD undef %32:vreg_64, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + ; CHECK: [[V_MUL_LO_U32_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32 [[V_ADD_U32_e32_]], [[S_MOV_B32_]], implicit $exec + ; CHECK: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, [[V_ADD_U32_e32_]], [[V_CMP_GT_U32_e64_]], implicit $exec + ; CHECK: [[V_SUB_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e32 [[V_MUL_LO_U32_]], [[DEF1]], implicit $exec + ; CHECK: [[V_MUL_LO_U32_1:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32 [[V_CNDMASK_B32_e64_]], [[S_MOV_B32_]], implicit $exec + ; CHECK: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_SUB_U32_e32_]], [[DEF]].sub0, implicit $exec + ; CHECK: [[V_SUB_U32_e32_1:%[0-9]+]]:vgpr_32 = V_SUB_U32_e32 [[V_MUL_LO_U32_1]], [[V_MUL_LO_U32_]], implicit $exec + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub1 + ; CHECK: [[DEF]].sub0:vreg_64 = V_ADD_U32_e32 [[V_SUB_U32_e32_1]], [[V_ADD_U32_e32_1]], implicit $exec + ; CHECK: undef %38.sub0:vreg_64, %39:sreg_64_xexec = V_ADD_I32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[DEF]].sub0, 0, implicit $exec + ; CHECK: undef %40.sub1:vreg_64, dead %41:sreg_64_xexec = V_ADDC_U32_e64 [[COPY1]], [[DEF]].sub1, %39, 0, implicit $exec + ; CHECK: undef %42.sub0:sgpr_64 = V_READFIRSTLANE_B32 %38.sub0, implicit $exec + ; CHECK: %42.sub1:sgpr_64 = V_READFIRSTLANE_B32 %40.sub1, implicit $exec + ; CHECK: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %42, 0, 0, 0 :: (load 4, addrspace 1) + ; CHECK: INLINEASM &"", 1 + ; CHECK: [[DS_READ_B32_gfx9_4:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 undef %45:vgpr_32, 0, 0, implicit $exec :: (load 4, addrspace 3) + ; CHECK: GLOBAL_STORE_DWORD undef %46:vreg_64, [[DS_READ_B32_gfx9_4]], 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + ; CHECK: %31.sub0:vreg_64 = COPY [[S_LOAD_DWORD_IMM]], implicit $exec + ; CHECK: DS_WRITE_B64_gfx9 undef %47:vgpr_32, %31, 0, 0, implicit $exec :: (store 8, addrspace 3) + ; CHECK: S_BRANCH %bb.1 + bb.0: + liveins: $sgpr4_sgpr5 + + %0:sgpr_64(p4) = COPY $sgpr4_sgpr5 + %1:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %2:vgpr_32, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr101, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) + %3:sgpr_64 = S_LOAD_DWORDX2_IMM %0(p4), 0, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4) + %4:sreg_32_xm0 = S_MOV_B32 5329 + undef %5.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec + %6:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %7:vgpr_32 = V_MOV_B32_e32 -4, implicit $exec + %8:vreg_64 = IMPLICIT_DEF + %9:vgpr_32 = IMPLICIT_DEF + %10:vgpr_32 = IMPLICIT_DEF + + bb.1: + INLINEASM &"", 1, 851978, def %11:vgpr_32 + GLOBAL_STORE_DWORD undef %12:vreg_64, %1, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + %13:vreg_64 = DS_READ_B64_gfx9 undef %14:vgpr_32, 0, 0, implicit $exec :: (load 8, addrspace 3) + INLINEASM &"def $0 $1", 1, 851978, def %15:vgpr_32, 851978, def %16:vgpr_32 + %17:vgpr_32 = DS_READ_B32_gfx9 %6, 0, 0, implicit $exec + %18:vgpr_32 = DS_READ_B32_gfx9 %7, 0, 0, implicit $exec + %19:vgpr_32 = DS_READ_B32_gfx9 undef %20:vgpr_32, 0, 0, implicit $exec + INLINEASM &"def $0 $1", 1, 851978, def %21:vgpr_32, 851978, def %22:vgpr_32 + %23:vgpr_32 = DS_READ_B32_gfx9 %7, 0, 0, implicit $exec + %24:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %5.sub1:vreg_64 = COPY %6 + %25:vgpr_32 = V_ADD_U32_e32 1, %10, implicit $exec + %26:sreg_64_xexec = V_CMP_GT_U32_e64 64, %25, implicit $exec + %27:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + INLINEASM &"", 1, 851978, def dead %24, 851978, def dead %27, 851977, %13.sub0, 2147483657, %24(tied-def 3), 2147549193, %27(tied-def 5), 851977, %15, 851977, %16, 851977, %18, 851977, %17, 851977, %23, 851977, %19 + DS_WRITE_B32_gfx9 undef %28:vgpr_32, %21, 0, 0, implicit $exec :: (store 4, addrspace 3) + DS_WRITE_B32_gfx9 undef %29:vgpr_32, %22, 0, 0, implicit $exec :: (store 4, addrspace 3) + DS_WRITE_B64_gfx9 undef %30:vgpr_32, %5, 0, 0, implicit $exec :: (store 8, addrspace 3) + undef %31.sub1:vreg_64 = FLAT_LOAD_DWORD undef %32:vreg_64, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4) + %33:vgpr_32 = V_MUL_LO_U32 %25, %4, implicit $exec + %10:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, %25, %26, implicit $exec + %34:vgpr_32 = V_SUB_U32_e32 %33, %9, implicit $exec + %9:vgpr_32 = V_MUL_LO_U32 %10, %4, implicit $exec + %35:vgpr_32 = V_ADD_U32_e32 %34, %8.sub0, implicit $exec + %36:vgpr_32 = V_SUB_U32_e32 %9, %33, implicit $exec + %37:vgpr_32 = COPY %3.sub1 + undef %8.sub0:vreg_64 = V_ADD_U32_e32 %36, %35, implicit $exec + %8.sub1:vreg_64 = COPY %6 + undef %38.sub0:vreg_64, %39:sreg_64_xexec = V_ADD_I32_e64 %3.sub0, %8.sub0, 0, implicit $exec + undef %40.sub1:vreg_64, dead %41:sreg_64_xexec = V_ADDC_U32_e64 %37, %8.sub1, %39, 0, implicit $exec + undef %42.sub0:sgpr_64 = V_READFIRSTLANE_B32 %38.sub0, implicit $exec + %42.sub1:sgpr_64 = V_READFIRSTLANE_B32 %40.sub1, implicit $exec + %43:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %42, 0, 0, 0 :: (load 4, addrspace 1) + INLINEASM &"", 1 + %44:vgpr_32 = DS_READ_B32_gfx9 undef %45:vgpr_32, 0, 0, implicit $exec :: (load 4, addrspace 3) + GLOBAL_STORE_DWORD undef %46:vreg_64, %44, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1) + %31.sub0:vreg_64 = COPY %43, implicit $exec + DS_WRITE_B64_gfx9 undef %47:vgpr_32, %31, 0, 0, implicit $exec :: (store 8, addrspace 3) + S_BRANCH %bb.1 + +... Index: unittests/MI/LiveIntervalTest.cpp =================================================================== --- unittests/MI/LiveIntervalTest.cpp +++ unittests/MI/LiveIntervalTest.cpp @@ -421,6 +421,46 @@ }); } +TEST(LiveIntervalTest, TestMoveSubRegDefAcrossUseDef) { + liveIntervalTest(R"MIR( + %1:vreg_64 = IMPLICIT_DEF + + bb.1: + %2:vgpr_32 = V_MOV_B32_e32 2, implicit $exec + %3:vgpr_32 = V_ADD_U32_e32 %2, %1.sub0, implicit $exec + undef %1.sub0:vreg_64 = V_ADD_U32_e32 %2, %2, implicit $exec + %1.sub1:vreg_64 = COPY %2 + S_NOP 0, implicit %1.sub1 + S_BRANCH %bb.1 + +)MIR", [](MachineFunction &MF, LiveIntervals &LIS) { + MachineInstr &UndefSubregDef = getMI(MF, 2, 1); + // The scheduler clears undef from subregister defs before moving + UndefSubregDef.getOperand(0).setIsUndef(false); + testHandleMove(MF, LIS, 3, 1, 1); + }); +} + +TEST(LiveIntervalTest, TestMoveSubRegDefAcrossUseDefMulti) { + liveIntervalTest(R"MIR( + %1:vreg_96 = IMPLICIT_DEF + + bb.1: + %2:vgpr_32 = V_MOV_B32_e32 2, implicit $exec + %3:vgpr_32 = V_ADD_U32_e32 %2, %1.sub0, implicit $exec + undef %1.sub0:vreg_96 = V_ADD_U32_e32 %2, %2, implicit $exec + %1.sub1:vreg_96 = COPY %2 + %1.sub2:vreg_96 = COPY %2 + S_NOP 0, implicit %1.sub1, implicit %1.sub2 + S_BRANCH %bb.1 + +)MIR", [](MachineFunction &MF, LiveIntervals &LIS) { + MachineInstr &UndefSubregDef = getMI(MF, 2, 1); + // The scheduler clears undef from subregister defs before moving + UndefSubregDef.getOperand(0).setIsUndef(false); + testHandleMove(MF, LIS, 4, 1, 1); + }); +} int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); initLLVM();