Index: llvm/lib/Target/AArch64/AArch64MacroFusion.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64MacroFusion.cpp +++ llvm/lib/Target/AArch64/AArch64MacroFusion.cpp @@ -33,8 +33,8 @@ /// should be scheduled back to back. Given an anchor instruction, if the other /// instruction is unspecified, then verify that the anchor instruction may be /// part of a pair at all. -static bool shouldScheduleAdjacent(const AArch64InstrInfo &TII, - const AArch64Subtarget &ST, +static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, + const TargetSubtargetInfo &TSI, const MachineInstr *First, const MachineInstr *Second) { assert((First || Second) && "At least one instr must be specified"); @@ -44,6 +44,8 @@ unsigned SecondOpcode = Second ? Second->getOpcode() : static_cast(AArch64::INSTRUCTION_LIST_END); + const AArch64InstrInfo &II = static_cast(TII); + const AArch64Subtarget &ST = static_cast(TSI); if (ST.hasArithmeticBccFusion()) // Fuse CMN, CMP, TST followed by Bcc. @@ -75,7 +77,7 @@ case AArch64::BICSWrs: case AArch64::BICSXrs: // Shift value can be 0 making these behave like the "rr" variant... - return !TII.hasShiftedReg(*First); + return !II.hasShiftedReg(*First); case AArch64::INSTRUCTION_LIST_END: return true; } @@ -117,7 +119,7 @@ case AArch64::BICWrs: case AArch64::BICXrs: // Shift value can be 0 making these behave like the "rr" variant... - return !TII.hasShiftedReg(*First); + return !II.hasShiftedReg(*First); case AArch64::INSTRUCTION_LIST_END: return true; } @@ -164,64 +166,61 @@ } /// \brief Implement the fusion of instruction pairs in the scheduling -/// DAG, anchored at the instruction in ASU. Preds -/// indicates if its dependencies in \param APreds are predecessors instead of -/// successors. -static bool scheduleAdjacentImpl(ScheduleDAGMI *DAG, SUnit *ASU, - SmallVectorImpl &APreds, bool Preds) { - const AArch64InstrInfo *TII = static_cast(DAG->TII); - const AArch64Subtarget &ST = DAG->MF.getSubtarget(); - - const MachineInstr *AMI = ASU->getInstr(); - if (!AMI || AMI->isPseudo() || AMI->isTransient() || - (Preds && !shouldScheduleAdjacent(*TII, ST, nullptr, AMI)) || - (!Preds && !shouldScheduleAdjacent(*TII, ST, AMI, nullptr))) +/// DAG, anchored at the instruction in ASU. Preds indicates if its dependencies +/// in ADeps are predecessors instead of successors. If specified, XSU adds an +/// exception to ADeps. +static bool scheduleAdjacentImpl(ScheduleDAGMI *DAG, SUnit &ASU, SUnit *XSU, + SmallVectorImpl &ADeps, bool Preds) { + const MachineInstr *AMI = ASU.getInstr(); + if (!AMI || AMI->isPseudo() || AMI->isTransient()) return false; - for (SDep &BDep : APreds) { + const MachineInstr *LMI = Preds ? nullptr : AMI; + const MachineInstr *RMI = Preds ? AMI : nullptr; + if (!shouldScheduleAdjacent(*DAG->TII, DAG->MF.getSubtarget(), LMI, RMI)) + return false; + + for (SDep &BDep : ADeps) { if (BDep.isWeak()) continue; - SUnit *BSU = BDep.getSUnit(); - const MachineInstr *BMI = BSU->getInstr(); - if (!BMI || BMI->isPseudo() || BMI->isTransient() || - (Preds && !shouldScheduleAdjacent(*TII, ST, BMI, AMI)) || - (!Preds && !shouldScheduleAdjacent(*TII, ST, AMI, BMI))) + SUnit &BSU = *BDep.getSUnit(); + if (XSU && XSU == &BSU) + continue; + + const MachineInstr *BMI = BSU.getInstr(); + if (!BMI || BMI->isPseudo() || BMI->isTransient()) + continue; + + LMI = Preds ? BMI : AMI; + RMI = Preds ? AMI : BMI; + if (!shouldScheduleAdjacent(*DAG->TII, DAG->MF.getSubtarget(), LMI, RMI)) continue; // Create a single weak edge between the adjacent instrs. The only // effect is to cause bottom-up scheduling to heavily prioritize the // clustered instrs. - if (Preds) - DAG->addEdge(ASU, SDep(BSU, SDep::Cluster)); - else - DAG->addEdge(BSU, SDep(ASU, SDep::Cluster)); + SUnit &LSU = Preds ? BSU : ASU; + SUnit &RSU = Preds ? ASU : BSU; + DAG->addEdge(&RSU, SDep(&LSU, SDep::Cluster)); // Adjust the latency between the 1st instr and its predecessors/successors. - for (SDep &Dep : APreds) - if (Dep.getSUnit() == BSU) + for (SDep &Dep : ADeps) + if (Dep.getSUnit() == &BSU) Dep.setLatency(0); // Adjust the latency between the 2nd instr and its successors/predecessors. - auto &BSuccs = Preds ? BSU->Succs : BSU->Preds; - for (SDep &Dep : BSuccs) - if (Dep.getSUnit() == ASU) + auto &BDeps = Preds ? BSU.Succs : BSU.Preds; + for (SDep &Dep : BDeps) + if (Dep.getSUnit() == &ASU) Dep.setLatency(0); ++NumFused; - DEBUG({ SUnit *LSU = Preds ? BSU : ASU; - SUnit *RSU = Preds ? ASU : BSU; - const MachineInstr *LMI = Preds ? BMI : AMI; - const MachineInstr *RMI = Preds ? AMI : BMI; - - dbgs() << DAG->MF.getName() << "(): Macro fuse "; - LSU->print(dbgs(), DAG); - dbgs() << " - "; - RSU->print(dbgs(), DAG); - dbgs() << " / " << - TII->getName(LMI->getOpcode()) << " - " << - TII->getName(RMI->getOpcode()) << '\n'; - }); + DEBUG(dbgs() << DAG->MF.getName() << "(): Macro fuse "; + LSU.print(dbgs(), DAG); dbgs() << " - "; + RSU.print(dbgs(), DAG); dbgs() << " / "; + dbgs() << DAG->TII->getName(LMI->getOpcode()) << " - " << + DAG->TII->getName(RMI->getOpcode()) << '\n'; ); return true; } @@ -244,10 +243,10 @@ // For each of the SUnits in the scheduling block, try to fuse the instruction // in it with one in its successors. for (SUnit &ASU : DAG->SUnits) - scheduleAdjacentImpl(DAG, &ASU, ASU.Succs, false); + scheduleAdjacentImpl(DAG, ASU, &DAG->ExitSU, ASU.Succs, false); // Try to fuse the instruction in the ExitSU with one in its predecessors. - scheduleAdjacentImpl(DAG, &DAG->ExitSU, DAG->ExitSU.Preds, true); + scheduleAdjacentImpl(DAG, DAG->ExitSU, nullptr, DAG->ExitSU.Preds, true); } } // end namespace