Index: llvm/lib/CodeGen/SplitKit.cpp =================================================================== --- llvm/lib/CodeGen/SplitKit.cpp +++ llvm/lib/CodeGen/SplitKit.cpp @@ -596,6 +596,7 @@ SlotIndex Def = buildSingleSubRegCopy(FromReg, ToReg, MBB, InsertBefore, BestIdx, DestLI, Late, SlotIndex()); + auto FirstMI = LIS.getInstructionFromIndex(Def)->getIterator(); // Greedy heuristic: Keep iterating keeping the best covering subreg index // each time. @@ -629,6 +630,13 @@ LanesLeft &= ~TRI.getSubRegIndexLaneMask(BestIdx); } + if (FirstMI->isBundledWithSucc()) { + finalizeBundle(MBB, FirstMI); + auto Bundle = std::prev(FirstMI); + assert(Bundle->isBundle()); + LIS.ReplaceMachineInstrInMaps(*FirstMI, *Bundle); + } + return Def; } Index: llvm/lib/CodeGen/VirtRegMap.cpp =================================================================== --- llvm/lib/CodeGen/VirtRegMap.cpp +++ llvm/lib/CodeGen/VirtRegMap.cpp @@ -400,18 +400,24 @@ /// after processing the last in the bundle. Does not update LiveIntervals /// which we shouldn't need for this instruction anymore. void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const { - if (!MI.isCopy()) + if (!MI.isCopy() && !MI.isKill()) return; if (MI.isBundledWithPred() && !MI.isBundledWithSucc()) { SmallVector MIs({&MI}); - // Only do this when the complete bundle is made out of COPYs. + // Only do this when the complete bundle is made out of COPYs and KILLs. MachineBasicBlock &MBB = *MI.getParent(); for (MachineBasicBlock::reverse_instr_iterator I = std::next(MI.getReverseIterator()), E = MBB.instr_rend(); I != E && I->isBundledWithSucc(); ++I) { - if (!I->isCopy()) + if (I->isBundle()) { + I->eraseFromBundle(); + if (Indexes) + Indexes->removeSingleMachineInstrFromMaps(*I); + break; + } + if (!I->isCopy() && !I->isKill()) return; MIs.push_back(&*I); } Index: llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir @@ -0,0 +1,119 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=greedy -o - -verify-machineinstrs %s | FileCheck -check-prefixes=MIR,RA %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=greedy,virtregrewriter,post-RA-sched -o - -verify-machineinstrs %s | FileCheck -check-prefixes=MIR,VR %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -start-before=greedy -o - -verify-machineinstrs %s | FileCheck -check-prefix=ASM %s + +--- +# MIR-LABEL: name: splitkit_copy_bundle + +# RA: BUNDLE implicit-def %4, implicit %5 { +# RA-NEXT: undef %4.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15:sgpr_1024 = COPY %5.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 +# RA-NEXT: internal %4.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27:sgpr_1024 = COPY %5.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27 +# RA-NEXT: internal %4.sub28_sub29:sgpr_1024 = COPY %5.sub28_sub29 +# RA-NEXT: } + +# RA: BUNDLE implicit-def %6, implicit %4 { +# RA-NEXT: undef %6.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15:sgpr_1024 = COPY %4.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 +# RA-NEXT: internal %6.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27:sgpr_1024 = COPY %4.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27 +# RA-NEXT: internal %6.sub28_sub29:sgpr_1024 = COPY %4.sub28_sub29 +# RA-NEXT: } + +# RA: BUNDLE implicit-def %4, implicit %6 { +# RA-NEXT: undef %4.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15:sgpr_1024 = COPY %6.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15 +# RA-NEXT: internal %4.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27:sgpr_1024 = COPY %6.sub12_sub13_sub14_sub15_sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27 +# RA-NEXT: internal %4.sub28_sub29:sgpr_1024 = COPY %6.sub28_sub29 +# RA-NEXT: } + + + +# VR-NOT: BUNDLE +# VR: renamable $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = KILL undef renamable $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 +# VR-NEXT: renamable $sgpr96_sgpr97 = KILL undef renamable $sgpr96_sgpr97 + +# VR-NOT: BUNDLE + +# ASM-LABEL: {{^}}splitkit_copy_bundle: +# ASM: ; implicit-def: $sgpr34_sgpr35 +# ASM-NEXT: ; implicit-def: $sgpr98_sgpr99 +# ASM-NEXT: ; kill: def $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 killed $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 +# ASM-NEXT: ; kill: def $sgpr96_sgpr97 killed $sgpr96_sgpr97 + +name: splitkit_copy_bundle +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + %0:sreg_64 = IMPLICIT_DEF + %1:sreg_64 = IMPLICIT_DEF + undef %2.sub1:sgpr_1024 = S_MOV_B32 -1 + %2.sub0:sgpr_1024 = S_MOV_B32 -1 + undef %3.sub0:sgpr_1024 = S_MOV_B32 0 + + bb.1: + %2.sub2:sgpr_1024 = COPY %2.sub0 + %2.sub3:sgpr_1024 = COPY %2.sub1 + %2.sub4:sgpr_1024 = COPY %2.sub0 + %2.sub5:sgpr_1024 = COPY %2.sub1 + %2.sub6:sgpr_1024 = COPY %2.sub0 + %2.sub7:sgpr_1024 = COPY %2.sub1 + %2.sub8:sgpr_1024 = COPY %2.sub0 + %2.sub9:sgpr_1024 = COPY %2.sub1 + %2.sub10:sgpr_1024 = COPY %2.sub0 + %2.sub11:sgpr_1024 = COPY %2.sub1 + %2.sub12:sgpr_1024 = COPY %2.sub0 + %2.sub13:sgpr_1024 = COPY %2.sub1 + %2.sub14:sgpr_1024 = COPY %2.sub0 + %2.sub15:sgpr_1024 = COPY %2.sub1 + %2.sub16:sgpr_1024 = COPY %2.sub0 + %2.sub17:sgpr_1024 = COPY %2.sub1 + %2.sub18:sgpr_1024 = COPY %2.sub0 + %2.sub19:sgpr_1024 = COPY %2.sub1 + %2.sub20:sgpr_1024 = COPY %2.sub0 + %2.sub21:sgpr_1024 = COPY %2.sub1 + %2.sub22:sgpr_1024 = COPY %2.sub0 + %2.sub23:sgpr_1024 = COPY %2.sub1 + %2.sub24:sgpr_1024 = COPY %2.sub0 + %2.sub25:sgpr_1024 = COPY %2.sub1 + %2.sub26:sgpr_1024 = COPY %2.sub0 + %2.sub27:sgpr_1024 = COPY %2.sub1 + %2.sub28:sgpr_1024 = COPY %2.sub0 + %2.sub29:sgpr_1024 = COPY %2.sub1 + %3.sub1:sgpr_1024 = COPY %3.sub0 + %3.sub2:sgpr_1024 = COPY %3.sub0 + %3.sub3:sgpr_1024 = COPY %3.sub0 + %3.sub4:sgpr_1024 = COPY %3.sub0 + %3.sub5:sgpr_1024 = COPY %3.sub0 + %3.sub6:sgpr_1024 = COPY %3.sub0 + %3.sub7:sgpr_1024 = COPY %3.sub0 + %3.sub8:sgpr_1024 = COPY %3.sub0 + %3.sub9:sgpr_1024 = COPY %3.sub0 + %3.sub10:sgpr_1024 = COPY %3.sub0 + %3.sub11:sgpr_1024 = COPY %3.sub0 + %3.sub12:sgpr_1024 = COPY %3.sub0 + %3.sub13:sgpr_1024 = COPY %3.sub0 + %3.sub14:sgpr_1024 = COPY %3.sub0 + %3.sub15:sgpr_1024 = COPY %3.sub0 + %3.sub16:sgpr_1024 = COPY %3.sub0 + %3.sub17:sgpr_1024 = COPY %3.sub0 + %3.sub18:sgpr_1024 = COPY %3.sub0 + %3.sub19:sgpr_1024 = COPY %3.sub0 + %3.sub20:sgpr_1024 = COPY %3.sub0 + %3.sub21:sgpr_1024 = COPY %3.sub0 + %3.sub22:sgpr_1024 = COPY %3.sub0 + %3.sub23:sgpr_1024 = COPY %3.sub0 + %3.sub24:sgpr_1024 = COPY %3.sub0 + %3.sub25:sgpr_1024 = COPY %3.sub0 + %3.sub26:sgpr_1024 = COPY %3.sub0 + %3.sub27:sgpr_1024 = COPY %3.sub0 + %3.sub28:sgpr_1024 = COPY %3.sub0 + %3.sub29:sgpr_1024 = COPY %3.sub0 + %3.sub30:sgpr_1024 = COPY %3.sub0 + %3.sub31:sgpr_1024 = COPY %3.sub0 + + bb.2: + S_NOP 0, implicit %0, implicit %1, csr_amdgpu_highregs + S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc + S_BRANCH %bb.2 + +...