Index: lib/CodeGen/MIRCanonicalizerPass.cpp =================================================================== --- lib/CodeGen/MIRCanonicalizerPass.cpp +++ lib/CodeGen/MIRCanonicalizerPass.cpp @@ -189,6 +189,7 @@ Instructions.push_back(&MI); } + std::map> MultiUsers; std::vector PseudoIdempotentInstructions; std::vector PhysRegDefs; for (auto *II : Instructions) { @@ -297,10 +298,26 @@ UseI->dump(); }); + MultiUsers[UseToBringDefCloserTo].push_back(Def); Changed = true; MBB->splice(UseI, MBB, DefI); } + // Sort the defs for users of multiple defs lexographically. + for (const auto &E : MultiUsers) { + + auto UseI = + std::find_if(MBB->instr_begin(), MBB->instr_end(), + [&](MachineInstr &MI) -> bool { return &MI == E.first; }); + + if (UseI == MBB->instr_end()) + continue; + + DEBUG(dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";); + Changed |= rescheduleLexographically( + E.second, MBB, [&]() -> MachineBasicBlock::iterator { return UseI; }); + } + PseudoIdempotentInstCount = PseudoIdempotentInstructions.size(); DEBUG(dbgs() << "Rescheduling Idempotent Instructions Lexographically.";); Changed |= rescheduleLexographically( Index: test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir =================================================================== --- /dev/null +++ test/CodeGen/MIR/AMDGPU/mir-canon-multi.mir @@ -0,0 +1,32 @@ +# RUN: llc -o - -march=amdgcn -run-pass mir-canonicalizer -x mir %s | FileCheck %s + +# CHECK: %1363:vgpr_32 = COPY %4354 +# CHECK: %1368:vgpr_32 = COPY %4355 +# CHECK: %1369:vgpr_32 = COPY %1372 +# CHECK: %1370:vgpr_32 = COPY %1373 +# CHECK: REG_SEQUENCE %1368, %subreg.sub0, %1363, %subreg.sub1 +# CHECK: REG_SEQUENCE %1368, %subreg.sub0, %1363, %subreg.sub1, %1369, %subreg.sub2, %1370, %subreg.sub3 + +... +--- +name: foo +body: | + bb.0: + %10:sreg_32_xm0 = S_MOV_B32 61440 + %11:sreg_32_xm0 = S_MOV_B32 0 + %3:vgpr_32 = COPY $vgpr0 + + %vreg123_0:vgpr_32 = COPY %3 + %0:sgpr_64 = COPY $sgpr0_sgpr1 + %vreg123_1:vgpr_32 = COPY %11 + %27:vreg_64 = REG_SEQUENCE %vreg123_0, %subreg.sub0, %vreg123_1, %subreg.sub1 + %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 9, 0 + %vreg123_2:vgpr_32 = COPY %4 + %5:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 11, 0 + %vreg123_3:vgpr_32 = COPY %5 + %16:sgpr_128 = REG_SEQUENCE killed %vreg123_0, %subreg.sub0, %vreg123_1, %subreg.sub1, %vreg123_2, %subreg.sub2, %vreg123_3, %subreg.sub3 + + BUFFER_STORE_DWORD_ADDR64 %vreg123_1, %27, killed %16, 0, 0, 0, 0, 0, implicit $exec + S_ENDPGM + +...