Index: lib/CodeGen/MachineCombiner.cpp =================================================================== --- lib/CodeGen/MachineCombiner.cpp +++ lib/CodeGen/MachineCombiner.cpp @@ -16,6 +16,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineTraceMetrics.h" @@ -41,6 +42,7 @@ namespace { class MachineCombiner : public MachineFunctionPass { + MachineFunction *MF; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; MCSchedModel SchedModel; @@ -135,14 +137,16 @@ assert(TSchedModel.hasInstrSchedModelOrItineraries() && "Missing machine model\n"); + const TargetSubtargetInfo &STI = MF->getSubtarget(); + // For each instruction in the new sequence compute the depth based on the // operands. Use the trace information when possible. For new operands which // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth + DEBUG(dbgs() << " Could be replaced with\n"); for (auto *InstrPtr : InsInstrs) { // for each Use unsigned IDepth = 0; - DEBUG(dbgs() << "NEW INSTR "; - InstrPtr->print(dbgs(), TII); - dbgs() << "\n";); + DEBUG(dbgs() << "\t" << STI.getSchedInfoStr(*InstrPtr) << ": "; + InstrPtr->print(dbgs(), false, false, TII);); for (const MachineOperand &MO : InstrPtr->operands()) { // Check for virtual register operand. if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) @@ -265,17 +269,20 @@ unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace); unsigned RootDepth = BlockTrace.getInstrCycles(*Root).Depth; - DEBUG(dbgs() << "DEPENDENCE DATA FOR " << *Root << "\n"; - dbgs() << " NewRootDepth: " << NewRootDepth << "\n"; - dbgs() << " RootDepth: " << RootDepth << "\n"); + DEBUG(dbgs() << " Dependence data for " << *Root << "\tNewRootDepth: " + << NewRootDepth << "\tRootDepth: " << RootDepth); // For a transform such as reassociation, the cost equation is // conservatively calculated so that we must improve the depth (data // dependency cycles) in the critical path to proceed with the transform. // Being conservative also protects against inaccuracies in the underlying // machine trace metrics and CPU models. - if (getCombinerObjective(Pattern) == CombinerObjective::MustReduceDepth) + if (getCombinerObjective(Pattern) == CombinerObjective::MustReduceDepth) { + DEBUG(dbgs() << "\tIt MustReduceDepth "); + DEBUG(NewRootDepth < RootDepth ? dbgs() << "\t and it does it\n" + : dbgs() << "\t but it does NOT do it\n"); return NewRootDepth < RootDepth; + } // A more flexible cost calculation for the critical path includes the slack // of the original code sequence. This may allow the transform to proceed @@ -296,17 +303,19 @@ unsigned RootSlack = BlockTrace.getInstrSlack(*Root); unsigned NewCycleCount = NewRootDepth + NewRootLatency; - unsigned OldCycleCount = RootDepth + RootLatency + - (SlackIsAccurate ? RootSlack : 0); - DEBUG(dbgs() << " NewRootLatency: " << NewRootLatency << "\n"; - dbgs() << " RootLatency: " << RootLatency << "\n"; - dbgs() << " RootSlack: " << RootSlack << " SlackIsAccurate=" - << SlackIsAccurate << "\n"; - dbgs() << " NewRootDepth + NewRootLatency = " - << NewCycleCount << "\n"; - dbgs() << " RootDepth + RootLatency + RootSlack = " - << OldCycleCount << "\n"; - ); + unsigned OldCycleCount = + RootDepth + RootLatency + (SlackIsAccurate ? RootSlack : 0); + DEBUG(dbgs() << "\n\tNewRootLatency: " << NewRootLatency << "\tRootLatency: " + << RootLatency << "\n\tRootSlack: " << RootSlack + << " SlackIsAccurate=" << SlackIsAccurate + << "\n\tNewRootDepth + NewRootLatency = " << NewCycleCount + << "\n\tRootDepth + RootLatency + RootSlack = " + << OldCycleCount;); + DEBUG(NewCycleCount <= OldCycleCount + ? dbgs() << "\n\t It IMPROVES PathLen because" + : dbgs() << "\n\t It DOES NOT improve PathLen because"); + DEBUG(dbgs() << "\n\t\tNewCycleCount = " << NewCycleCount << + ", OldCycleCount = " << OldCycleCount << "\n"); return NewCycleCount <= OldCycleCount; } @@ -352,9 +361,14 @@ unsigned ResLenAfterCombine = BlockTrace.getResourceLength(MBBarr, MSCInsArr, MSCDelArr); - DEBUG(dbgs() << "RESOURCE DATA: \n"; - dbgs() << " resource len before: " << ResLenBeforeCombine - << " after: " << ResLenAfterCombine << "\n";); + DEBUG(dbgs() << "\t\tResource length before replacement: " + << ResLenBeforeCombine << " and after: " << ResLenAfterCombine + << "\n";); + DEBUG( + ResLenAfterCombine <= ResLenBeforeCombine + ? dbgs() << "\t\t As result it IMPROVES/PRESERVES Resource Length\n" + : dbgs() << "\t\t As result it DOES NOT improve/preserve Resource " + "Length\n"); return ResLenAfterCombine <= ResLenBeforeCombine; } @@ -419,6 +433,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { bool Changed = false; DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n"); + const TargetSubtargetInfo &STI = MF->getSubtarget(); bool IncrementalUpdate = false; auto BlockIter = MBB->begin(); @@ -433,9 +448,8 @@ while (BlockIter != MBB->end()) { auto &MI = *BlockIter++; - - DEBUG(dbgs() << "INSTR "; MI.dump(); dbgs() << "\n";); SmallVector Patterns; + // The motivating example is: // // MUL Other MUL_op1 MUL_op2 Other @@ -464,6 +478,10 @@ if (!TII->getMachineCombinerPatterns(MI, Patterns)) continue; + DEBUG(dbgs() << " Possible instr(s) to replace\n"); + DEBUG(dbgs() << "\t" << STI.getSchedInfoStr(MI) << ": "; + MI.print(dbgs(), false, false, TII);); + for (auto P : Patterns) { SmallVector InsInstrs; SmallVector DelInstrs; @@ -478,6 +496,17 @@ if (!NewInstCount) continue; + DEBUG(dbgs() << "\tDelInstrs\n"); + for (auto *InstrPtr : DelInstrs) { + DEBUG(dbgs() << "\t\t" << STI.getSchedInfoStr(*InstrPtr) << ": "; + InstrPtr->print(dbgs(), false, false, TII);); + } + DEBUG(dbgs() << "\tInsInstrs\n"); + for (auto *InstrPtr : InsInstrs) { + DEBUG(dbgs() << "\t\t" << STI.getSchedInfoStr(*InstrPtr) << ": "; + InstrPtr->print(dbgs(), false, false, TII);); + } + bool SubstituteAlways = false; if (ML && TII->isThroughputPattern(P)) SubstituteAlways = true; @@ -539,6 +568,7 @@ } bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) { + this->MF = &MF; const TargetSubtargetInfo &STI = MF.getSubtarget(); TII = STI.getInstrInfo(); TRI = STI.getRegisterInfo();