Index: lib/CodeGen/MIRCanonicalizerPass.cpp =================================================================== --- lib/CodeGen/MIRCanonicalizerPass.cpp +++ lib/CodeGen/MIRCanonicalizerPass.cpp @@ -131,7 +131,43 @@ return ~0U; } -static bool rescheduleCanonically(MachineBasicBlock *MBB) { +static bool +rescheduleLexographically(std::vector instructions, + MachineBasicBlock *MBB, + std::function getPos) { + + bool Changed = false; + std::map StringInstrMap; + + for (auto *II : instructions) { + std::string S; + raw_string_ostream OS(S); + II->print(OS); + OS.flush(); + + // Trim the assignment, or start from the begining in the case of a store. + const size_t i = S.find("="); + StringInstrMap.insert({(i == std::string::npos) ? S : S.substr(i), II}); + } + + for (auto &II : StringInstrMap) { + + DEBUG({ + dbgs() << "Splicing "; + II.second->dump(); + dbgs() << " right before: "; + getPos()->dump(); + }); + + Changed = true; + MBB->splice(getPos(), MBB, II.second); + } + + return Changed; +} + +static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, + MachineBasicBlock *MBB) { bool Changed = false; @@ -153,13 +189,60 @@ Instructions.push_back(&MI); } + std::vector PseudoIdempotentInstructions; + std::vector PhysRegDefs; + for (auto *II : Instructions) { + for (unsigned i = 1; i < II->getNumOperands(); i++) { + MachineOperand &MO = II->getOperand(i); + if (!MO.isReg()) + continue; + + if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; + + if (!MO.isDef()) + continue; + + PhysRegDefs.push_back(MO.getReg()); + } + } + for (auto *II : Instructions) { if (II->getNumOperands() == 0) continue; + if (II->mayLoad()) { + continue; + } + if (II->mayStore()) { + continue; + } MachineOperand &MO = II->getOperand(0); if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; + if (!MO.isDef()) + continue; + + bool IsPseudoIdempotent = true; + for (unsigned i = 1; i < II->getNumOperands(); i++) { + if (II->getOperand(i).isReg()) { + if (!TargetRegisterInfo::isVirtualRegister( + II->getOperand(i).getReg())) { + if (llvm::find(PhysRegDefs, II->getOperand(i).getReg()) == + PhysRegDefs.end()) { + continue; + } + } + + IsPseudoIdempotent = false; + break; + } + } + + if (IsPseudoIdempotent) { + PseudoIdempotentInstructions.push_back(II); + continue; + } DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump();); @@ -194,9 +277,6 @@ if (DefI != BBE && UseI != BBE) break; - if ((&*BBI != Def) && (&*BBI != UseToBringDefCloserTo)) - continue; - if (&*BBI == Def) { DefI = BBI; continue; @@ -222,6 +302,12 @@ MBB->splice(UseI, MBB, DefI); } + PseudoIdempotentInstCount = PseudoIdempotentInstructions.size(); + DEBUG(dbgs() << "Rescheduling Idempotent Instructions Lexographically.";); + Changed |= rescheduleLexographically( + PseudoIdempotentInstructions, MBB, + [&]() -> MachineBasicBlock::iterator { return MBB->begin(); }); + return Changed; } @@ -517,7 +603,8 @@ DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";); DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump();); - Changed |= rescheduleCanonically(MBB); + unsigned PseudoIdempotentInstCount = 0; + Changed |= rescheduleCanonically(PseudoIdempotentInstCount, MBB); DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump();); std::vector Candidates = populateCandidates(MBB); @@ -579,6 +666,38 @@ auto VRegRenameMap = GetVRegRenameMap(VRegs, renamedInOtherBB, MRI, DummyRC); Changed |= doVRegRenaming(renamedInOtherBB, VRegRenameMap, MRI); + + // Here we renumber the def vregs for the idempotent instructions from the top + // of the MachineBasicBlock so that they are named in the order that we sorted + // them alphabetically. If there's less than two idempotent instructions we + // don't bother with this step because it doesn't seem to have a lot of value + // in real world cases we've tried. + if (PseudoIdempotentInstCount >= 2) { + Changed = true; + unsigned i = 0; + unsigned gap = 1; + SkipVRegs(gap, MRI, DummyRC); + + for (auto &MI : *MBB) { + + if (i++ >= PseudoIdempotentInstCount) { + break; + } + + unsigned vRegToRename = MI.getOperand(0).getReg(); + auto Rename = MRI.createVirtualRegister(MRI.getRegClass(vRegToRename)); + + std::vector RenameMOs; + for (auto &MO : MRI.reg_operands(vRegToRename)) { + RenameMOs.push_back(&MO); + } + + for (auto *MO : RenameMOs) { + MO->setReg(Rename); + } + } + } + Changed |= doDefKillClear(MBB); DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump(); dbgs() << "\n";); @@ -622,4 +741,3 @@ return Changed; } - Index: test/CodeGen/MIR/AArch64/mirCanonIdempotent.mir =================================================================== --- /dev/null +++ test/CodeGen/MIR/AArch64/mirCanonIdempotent.mir @@ -0,0 +1,116 @@ +# RUN: llc -mtriple=arm64-apple-ios11.0.0 -o - -run-pass mir-canonicalizer %s | FileCheck %s +# These Idempotent instructions are sorted alphabetically (based on after the '=') +# CHECK: %4353:gpr64 = MOVi64imm 4617315517961601024 +# CHECK: %4354:gpr32 = MOVi32imm 408 +# CHECK: %4355:gpr64all = IMPLICIT_DEF +# CHECK: %4356:fpr64 = FMOVDi 20 +# CHECK: %4357:fpr64 = FMOVDi 112 +... +--- +name: Proc8 +stack: + - { id: 0, type: default, offset: 0, size: 4, alignment: 4, + stack-id: 0, callee-saved-register: '', callee-saved-restored: true, + local-offset: -4, di-variable: '', di-expression: '', di-location: '' } + - { id: 1, type: default, offset: 0, size: 8, alignment: 8, + stack-id: 0, callee-saved-register: '', callee-saved-restored: true, + local-offset: -16, di-variable: '', di-expression: '', di-location: '' } + - { id: 2, type: default, offset: 0, size: 8, alignment: 8, + stack-id: 0, callee-saved-register: '', callee-saved-restored: true, + local-offset: -24, di-variable: '', di-expression: '', di-location: '' } + - { id: 3, type: default, offset: 0, size: 8, alignment: 8, + stack-id: 0, callee-saved-register: '', callee-saved-restored: true, + local-offset: -32, di-variable: '', di-expression: '', di-location: '' } + - { id: 4, type: default, offset: 0, size: 8, alignment: 8, + stack-id: 0, callee-saved-register: '', callee-saved-restored: true, + local-offset: -40, di-variable: '', di-expression: '', di-location: '' } + - { id: 5, type: default, offset: 0, size: 8, alignment: 8, + stack-id: 0, callee-saved-register: '', callee-saved-restored: true, + local-offset: -48, di-variable: '', di-expression: '', di-location: '' } + - { id: 6, type: default, offset: 0, size: 8, alignment: 8, + stack-id: 0, callee-saved-register: '', callee-saved-restored: true, + local-offset: -56, di-variable: '', di-expression: '', di-location: '' } +constants: +body: | + bb.0: + liveins: $x0, $x1, $d0, $d1 + + %3:fpr64 = COPY $d1 + %2:fpr64 = COPY $d0 + %1:gpr64 = COPY $x1 + %0:gpr64common = COPY $x0 + STRXui %0, %stack.1, 0 :: (store 8) + STRXui %1, %stack.2, 0 :: (store 8) + STRDui %2, %stack.3, 0 :: (store 8) + STRDui %3, %stack.4, 0 :: (store 8) + + %4:fpr64 = FMOVDi 20 + %5:fpr64 = FADDDrr %2, killed %4 + STRDui %5, %stack.5, 0 :: (store 8) + + %6:gpr32 = FCVTZSUWDr %5 + STRDroW %3, %0, killed %6, 1, 1 + + %7:gpr64common = LDRXui %stack.1, 0 :: (dereferenceable load 8) + %8:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load 8) + + %9:gpr32common = FCVTZSUWDr killed %8 + %10:fpr64 = LDRDroW %7, %9, 1, 1 + + %11:gpr32common = ADDWri %9, 1, 0 + STRDroW killed %10, %7, killed %11, 1, 1 + + %12:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load 8) + %13:gpr64common = LDRXui %stack.1, 0 :: (dereferenceable load 8) + + %14:gpr32common = FCVTZSUWDr %12 + %15:gpr32common = ADDWri killed %14, 30, 0 + STRDroW %12, killed %13, killed %15, 1, 1 + + %16:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load 8) + STRDui killed %16, %stack.6, 0 :: (store 8) + + %19:fpr64 = FMOVDi 112 + %46:gpr32 = MOVi32imm 408 + %43:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load 8) + %44:gpr64 = LDRXui %stack.2, 0 :: (dereferenceable load 8) + + %45:gpr32 = FCVTZSUWDr %43 + %47:gpr64common = SMADDLrrr killed %45, %46, killed %44 + %48:fpr64 = LDRDui %stack.6, 0 :: (dereferenceable load 8) + + %49:gpr32 = FCVTZSUWDr killed %48 + STRDroW %43, killed %47, killed %49, 1, 1 + + %21:gpr64 = LDRXui %stack.2, 0 :: (dereferenceable load 8) + %22:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load 8) + + %23:gpr32 = FCVTZSUWDr killed %22 + %24:gpr32 = MOVi32imm 408 + %25:gpr64common = SMADDLrrr %23, %24, killed %21 + %26:gpr64sp = ADDXrx killed %25, %23, 51 + %27:fpr64 = LDURDi %26, -8 + %29:fpr64 = FADDDrr killed %27, %19 + STURDi killed %29, %26, -8 + + %30:gpr64common = LDRXui %stack.1, 0 :: (dereferenceable load 8) + %31:fpr64 = LDRDui %stack.5, 0 :: (dereferenceable load 8) + + %32:gpr32common = FCVTZSUWDr killed %31 + %34:gpr64all = IMPLICIT_DEF + %33:gpr64 = INSERT_SUBREG %34, %32, %subreg.sub_32 + %35:gpr64 = SBFMXri killed %33, 61, 31 + %36:fpr64 = LDRDroX killed %30, %35, 0, 0 + %37:gpr64 = LDRXui %stack.2, 0 :: (dereferenceable load 8) + + %38:gpr32common = ADDWri %32, 20, 0 + %39:gpr64common = SMADDLrrr killed %38, %24, killed %37 + STRDroX killed %36, killed %39, %35, 0, 0 + + %40:gpr64 = MOVi64imm 4617315517961601024 + + %42:gpr32 = LDRWui %stack.0, 0 :: (dereferenceable load 8) + $w0 = COPY %42 + RET_ReallyLR implicit $w0 + +...