Index: lib/CodeGen/MachineCombiner.cpp =================================================================== --- lib/CodeGen/MachineCombiner.cpp +++ lib/CodeGen/MachineCombiner.cpp @@ -41,6 +41,7 @@ namespace { class MachineCombiner : public MachineFunctionPass { + MachineFunction *MF; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; MCSchedModel SchedModel; @@ -134,15 +135,14 @@ SmallVector InstrDepth; assert(TSchedModel.hasInstrSchedModelOrItineraries() && "Missing machine model\n"); - +#ifndef NDEBUG + const TargetSubtargetInfo &STI = MF->getSubtarget(); +#endif // For each instruction in the new sequence compute the depth based on the // operands. Use the trace information when possible. For new operands which // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth for (auto *InstrPtr : InsInstrs) { // for each Use unsigned IDepth = 0; - DEBUG(dbgs() << "NEW INSTR "; - InstrPtr->print(dbgs(), TII); - dbgs() << "\n";); for (const MachineOperand &MO : InstrPtr->operands()) { // Check for virtual register operand. if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) @@ -265,17 +265,20 @@ unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace); unsigned RootDepth = BlockTrace.getInstrCycles(*Root).Depth; - DEBUG(dbgs() << "DEPENDENCE DATA FOR " << *Root << "\n"; - dbgs() << " NewRootDepth: " << NewRootDepth << "\n"; - dbgs() << " RootDepth: " << RootDepth << "\n"); + DEBUG(dbgs() << " Dependence data for " << *Root << "\tNewRootDepth: " + << NewRootDepth << "\tRootDepth: " << RootDepth); // For a transform such as reassociation, the cost equation is // conservatively calculated so that we must improve the depth (data // dependency cycles) in the critical path to proceed with the transform. // Being conservative also protects against inaccuracies in the underlying // machine trace metrics and CPU models. - if (getCombinerObjective(Pattern) == CombinerObjective::MustReduceDepth) + if (getCombinerObjective(Pattern) == CombinerObjective::MustReduceDepth) { + DEBUG(dbgs() << "\tIt MustReduceDepth "); + DEBUG(NewRootDepth < RootDepth ? dbgs() << "\t and it does it\n" + : dbgs() << "\t but it does NOT do it\n"); return NewRootDepth < RootDepth; + } // A more flexible cost calculation for the critical path includes the slack // of the original code sequence. This may allow the transform to proceed @@ -296,17 +299,19 @@ unsigned RootSlack = BlockTrace.getInstrSlack(*Root); unsigned NewCycleCount = NewRootDepth + NewRootLatency; - unsigned OldCycleCount = RootDepth + RootLatency + - (SlackIsAccurate ? RootSlack : 0); - DEBUG(dbgs() << " NewRootLatency: " << NewRootLatency << "\n"; - dbgs() << " RootLatency: " << RootLatency << "\n"; - dbgs() << " RootSlack: " << RootSlack << " SlackIsAccurate=" - << SlackIsAccurate << "\n"; - dbgs() << " NewRootDepth + NewRootLatency = " - << NewCycleCount << "\n"; - dbgs() << " RootDepth + RootLatency + RootSlack = " - << OldCycleCount << "\n"; - ); + unsigned OldCycleCount = + RootDepth + RootLatency + (SlackIsAccurate ? RootSlack : 0); + DEBUG(dbgs() << "\n\tNewRootLatency: " << NewRootLatency << "\tRootLatency: " + << RootLatency << "\n\tRootSlack: " << RootSlack + << " SlackIsAccurate=" << SlackIsAccurate + << "\n\tNewRootDepth + NewRootLatency = " << NewCycleCount + << "\n\tRootDepth + RootLatency + RootSlack = " + << OldCycleCount;); + DEBUG(NewCycleCount <= OldCycleCount + ? dbgs() << "\n\t It IMPROVES PathLen because" + : dbgs() << "\n\t It DOES NOT improve PathLen because"); + DEBUG(dbgs() << "\n\t\tNewCycleCount = " << NewCycleCount << + ", OldCycleCount = " << OldCycleCount << "\n"); return NewCycleCount <= OldCycleCount; } @@ -352,9 +357,14 @@ unsigned ResLenAfterCombine = BlockTrace.getResourceLength(MBBarr, MSCInsArr, MSCDelArr); - DEBUG(dbgs() << "RESOURCE DATA: \n"; - dbgs() << " resource len before: " << ResLenBeforeCombine - << " after: " << ResLenAfterCombine << "\n";); + DEBUG(dbgs() << "\t\tResource length before replacement: " + << ResLenBeforeCombine << " and after: " << ResLenAfterCombine + << "\n";); + DEBUG( + ResLenAfterCombine <= ResLenBeforeCombine + ? dbgs() << "\t\t As result it IMPROVES/PRESERVES Resource Length\n" + : dbgs() << "\t\t As result it DOES NOT improve/preserve Resource " + "Length\n"); return ResLenAfterCombine <= ResLenBeforeCombine; } @@ -419,7 +429,9 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { bool Changed = false; DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n"); - +#ifndef NDEBUG + const TargetSubtargetInfo &STI = MF->getSubtarget(); +#endif bool IncrementalUpdate = false; auto BlockIter = MBB->begin(); decltype(BlockIter) LastUpdate; @@ -433,9 +445,8 @@ while (BlockIter != MBB->end()) { auto &MI = *BlockIter++; - - DEBUG(dbgs() << "INSTR "; MI.dump(); dbgs() << "\n";); SmallVector Patterns; + // The motivating example is: // // MUL Other MUL_op1 MUL_op2 Other @@ -464,6 +475,10 @@ if (!TII->getMachineCombinerPatterns(MI, Patterns)) continue; + DEBUG(dbgs() << " Possible instr(s) to replace\n"); + DEBUG(dbgs() << "\t" << STI.getSchedInfoStr(MI) << ": "; + MI.print(dbgs(), false, false, TII);); + for (auto P : Patterns) { SmallVector InsInstrs; SmallVector DelInstrs; @@ -478,6 +493,17 @@ if (!NewInstCount) continue; + DEBUG(dbgs() << "\tThese instructions could be removed\n"); + for (auto const *InstrPtr : DelInstrs) { + DEBUG(dbgs() << "\t\t" << STI.getSchedInfoStr(*InstrPtr) << ": "; + InstrPtr->print(dbgs(), false, false, TII);); + } + DEBUG(dbgs() << "\tThese instructions could replace the removed ones\n"); + for (auto const *InstrPtr : InsInstrs) { + DEBUG(dbgs() << "\t\t" << STI.getSchedInfoStr(*InstrPtr) << ": "; + InstrPtr->print(dbgs(), false, false, TII);); + } + bool SubstituteAlways = false; if (ML && TII->isThroughputPattern(P)) SubstituteAlways = true; @@ -538,19 +564,20 @@ return Changed; } -bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) { - const TargetSubtargetInfo &STI = MF.getSubtarget(); +bool MachineCombiner::runOnMachineFunction(MachineFunction &_MF) { + MF = &_MF; + const TargetSubtargetInfo &STI = MF->getSubtarget(); TII = STI.getInstrInfo(); TRI = STI.getRegisterInfo(); SchedModel = STI.getSchedModel(); TSchedModel.init(SchedModel, &STI, TII); - MRI = &MF.getRegInfo(); + MRI = &MF->getRegInfo(); MLI = &getAnalysis(); Traces = &getAnalysis(); MinInstr = nullptr; - OptSize = MF.getFunction().optForSize(); + OptSize = MF->getFunction().optForSize(); - DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n'); + DEBUG(dbgs() << getPassName() << ": " << MF->getName() << '\n'); if (!TII->useMachineCombiner()) { DEBUG(dbgs() << " Skipping pass: Target does not support machine combiner\n"); return false; @@ -559,7 +586,7 @@ bool Changed = false; // Try to combine instructions. - for (auto &MBB : MF) + for (auto &MBB : _MF) Changed |= combineInstructions(&MBB); return Changed;