Index: include/llvm/CodeGen/MachineReassociation.h =================================================================== --- /dev/null +++ include/llvm/CodeGen/MachineReassociation.h @@ -0,0 +1,38 @@ +//=== MachineReassociation.h - Inst reassociation pattern -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines instruction pattern supported by reassociation. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_MACHINEREASSOCIATION_H +#define LLVM_CODEGEN_MACHINEREASSOCIATION_H + +namespace llvm { + +/// Enumeration of instruction pattern supported by machine reassociation +/// +/// +namespace MachineReassociationPattern { +// Forward declaration +enum MR_PATTERN : int{ + // These are commutative variants for reassociating a computation chain + // of the form: + // B = A op X (Prev) + // C = B op Y (Root) + M_REASSOC_AX_BY = 0, + M_REASSOC_AX_YB = 1, + M_REASSOC_XA_BY = 2, + M_REASSOC_XA_YB = 3, +}; +} // end namespace MachineReassociationPattern +} // end namespace llvm + +#endif + Index: include/llvm/CodeGen/Passes.h =================================================================== --- include/llvm/CodeGen/Passes.h +++ include/llvm/CodeGen/Passes.h @@ -517,6 +517,10 @@ /// critical-path and resource depth. extern char &MachineCombinerID; + /// This pass reassociate instructions using trace metrics to estimate + /// critical-path and resource depth. + extern char &MachineReassociationID; + /// StackSlotColoring - This pass performs stack coloring and merging. /// It merges disjoint allocas to reduce the stack size. extern char &StackColoringID; Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -289,6 +289,7 @@ void initializeMIRPrintingPassPass(PassRegistry&); void initializeStackMapLivenessPass(PassRegistry&); void initializeMachineCombinerPass(PassRegistry &); +void initializeMachineReassociationPass(PassRegistry &); void initializeLoadCombinePass(PassRegistry&); void initializeRewriteSymbolsPass(PassRegistry&); void initializeWinEHPreparePass(PassRegistry&); Index: include/llvm/Target/TargetInstrInfo.h =================================================================== --- include/llvm/Target/TargetInstrInfo.h +++ include/llvm/Target/TargetInstrInfo.h @@ -17,6 +17,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/MachineCombinerPattern.h" +#include "llvm/CodeGen/MachineReassociation.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/Support/BranchProbability.h" @@ -731,6 +732,26 @@ return false; } + /// Attempt to reassociate the instructions to reduce critical path length. + virtual void reassociateOps(MachineInstr &Root, MachineInstr &Prev, + MachineReassociationPattern::MR_PATTERN Pattern, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs, + DenseMap &InstrIdxForVirtReg) const { + return; + } + + /// Return true when \P Inst is both associative and commutative + virtual bool isAssociativeAndCommutative(const MachineInstr &Inst) const { + return false; + } + + /// Return true when \P Inst has reassociable operands in the same \P MBB + virtual bool hasReassociableOperands(const MachineInstr &Inst, + const MachineBasicBlock *MBB) const { + return false; + } + /// When getMachineCombinerPatterns() finds patterns, this function generates /// the instructions that could replace the original code sequence. The client /// has to decide whether the actual replacement is beneficial or not. @@ -752,6 +773,9 @@ /// Return true when a target supports MachineCombiner. virtual bool useMachineCombiner() const { return false; } + /// Return true when a target supports MachineReassociation. + virtual bool useMachineReassociation() const { return false; } + protected: /// Target-dependent implementation for foldMemoryOperand. /// Target-independent code in foldMemoryOperand will Index: lib/CodeGen/CMakeLists.txt =================================================================== --- lib/CodeGen/CMakeLists.txt +++ lib/CodeGen/CMakeLists.txt @@ -57,6 +57,7 @@ MachineBranchProbabilityInfo.cpp MachineCSE.cpp MachineCombiner.cpp + MachineReassociation.cpp MachineCopyPropagation.cpp MachineDominators.cpp MachineDominanceFrontier.cpp Index: lib/CodeGen/CodeGen.cpp =================================================================== --- lib/CodeGen/CodeGen.cpp +++ lib/CodeGen/CodeGen.cpp @@ -44,6 +44,7 @@ initializeMachineCSEPass(Registry); initializeImplicitNullChecksPass(Registry); initializeMachineCombinerPass(Registry); + initializeMachineReassociationPass(Registry); initializeMachineCopyPropagationPass(Registry); initializeMachineDominatorTreePass(Registry); initializeMachineFunctionPrinterPassPass(Registry); Index: lib/CodeGen/MachineReassociation.cpp =================================================================== --- /dev/null +++ lib/CodeGen/MachineReassociation.cpp @@ -0,0 +1,562 @@ +//=== MachineReassociation.cpp - Reassociate Inst on SSA form machine code ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The machine reassocation pass uses machine trace metrics to ensure the new +// instructions does not lengthen the critical path or the resource depth. +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "machine-reassociation" + +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineTraceMetrics.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +STATISTIC(NumInstReassociated, "Number of machineinst reassociated"); + +namespace { +class MachineReassociation : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MCSchedModel SchedModel; + MachineRegisterInfo *MRI; + MachineTraceMetrics *Traces; + MachineTraceMetrics::Ensemble *MinInstr; + + TargetSchedModel TSchedModel; + + /// True if optimizing for code size. + bool OptSize; + +public: + static char ID; + MachineReassociation() : MachineFunctionPass(ID) { + initializeMachineReassociationPass(*PassRegistry::getPassRegistry()); + } + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnMachineFunction(MachineFunction &MF) override; + const char *getPassName() const override { + return "Machine Reassociate Inst"; + } + +private: + bool doSubstitute(unsigned NewSize, unsigned OldSize); + bool reassociateInstructions(MachineBasicBlock *); + MachineInstr *getOperandDef(const MachineOperand &MO); + unsigned getDepth(SmallVectorImpl &InsInstrs, + DenseMap &InstrIdxForVirtReg, + MachineTraceMetrics::Trace BlockTrace); + unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot, + MachineTraceMetrics::Trace BlockTrace); + bool improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root, + MachineTraceMetrics::Trace BlockTrace, + SmallVectorImpl &InsInstrs, + DenseMap &InstrIdxForVirtReg, + bool NewCodeHasLessInsts); + bool preservesResourceLen(MachineBasicBlock *MBB, + MachineTraceMetrics::Trace BlockTrace, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs); + void instr2instrSC(SmallVectorImpl &Instrs, + SmallVectorImpl &InstrsSC); + + bool hasReassociableSibling(const MachineInstr &Inst, bool &Commuted); + + bool isReassociationCandidate(const MachineInstr &Inst, bool &Commuted); + + bool getMachineReassociationPatterns( + MachineInstr &Root, + SmallVectorImpl &Pattern); + + void + genAlternativeCodeSequence(MachineInstr &Root, + MachineReassociationPattern::MR_PATTERN Pattern, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs, + DenseMap &InstrIdxForVirtReg); +}; +} + +char MachineReassociation::ID = 0; +char &llvm::MachineReassociationID = MachineReassociation::ID; + +INITIALIZE_PASS_BEGIN(MachineReassociation, "machine-reassociate", + "Machine InstReassociate", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) +INITIALIZE_PASS_END(MachineReassociation, "machine-reassociate", + "Machine InstReassociate", false, false) + +void MachineReassociation::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addPreserved(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +MachineInstr *MachineReassociation::getOperandDef(const MachineOperand &MO) { + MachineInstr *DefInstr = nullptr; + // We need a virtual register definition. + if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) + DefInstr = MRI->getUniqueVRegDef(MO.getReg()); + // PHI's have no depth etc. + if (DefInstr && DefInstr->isPHI()) + DefInstr = nullptr; + return DefInstr; +} + +/// Computes depth of instructions in vector \InsInstr. +/// +/// \param InsInstrs is a vector of machine instructions +/// \param InstrIdxForVirtReg is a dense map of virtual register to index +/// of defining machine instruction in \p InsInstrs +/// \param BlockTrace is a trace of machine instructions +/// +/// \returns Depth of last instruction in \InsInstrs ("NewRoot") +unsigned +MachineReassociation::getDepth(SmallVectorImpl &InsInstrs, + DenseMap &InstrIdxForVirtReg, + MachineTraceMetrics::Trace BlockTrace) { + + SmallVector InstrDepth; + assert(TSchedModel.hasInstrSchedModelOrItineraries() && + "Missing machine model\n"); + + // For each instruction in the new sequence compute the depth based on the + // operands. Use the trace information when possible. For new operands which + // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth + for (auto *InstrPtr : InsInstrs) { // for each Use + unsigned IDepth = 0; + DEBUG(dbgs() << "NEW INSTR "; InstrPtr->dump(); dbgs() << "\n";); + for (const MachineOperand &MO : InstrPtr->operands()) { + // Check for virtual register operand. + if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) + continue; + if (!MO.isUse()) + continue; + unsigned DepthOp = 0; + unsigned LatencyOp = 0; + DenseMap::iterator II = + InstrIdxForVirtReg.find(MO.getReg()); + if (II != InstrIdxForVirtReg.end()) { + // Operand is new virtual register not in trace + assert(II->second < InstrDepth.size() && "Bad Index"); + MachineInstr *DefInstr = InsInstrs[II->second]; + assert(DefInstr && + "There must be a definition for a new virtual register"); + DepthOp = InstrDepth[II->second]; + LatencyOp = TSchedModel.computeOperandLatency( + DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()), + InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg())); + } else { + MachineInstr *DefInstr = getOperandDef(MO); + if (DefInstr) { + DepthOp = BlockTrace.getInstrCycles(DefInstr).Depth; + LatencyOp = TSchedModel.computeOperandLatency( + DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()), + InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg())); + } + } + IDepth = std::max(IDepth, DepthOp + LatencyOp); + } + InstrDepth.push_back(IDepth); + } + unsigned NewRootIdx = InsInstrs.size() - 1; + return InstrDepth[NewRootIdx]; +} + +/// Computes instruction latency as max of latency of defined operands. +/// +/// \param Root is a machine instruction that could be replaced by NewRoot. +/// It is used to compute a more accurate latency information for NewRoot in +/// case there is a dependent instruction in the same trace (\p BlockTrace) +/// \param NewRoot is the instruction for which the latency is computed +/// \param BlockTrace is a trace of machine instructions +/// +/// \returns Latency of \p NewRoot +unsigned +MachineReassociation::getLatency(MachineInstr *Root, MachineInstr *NewRoot, + MachineTraceMetrics::Trace BlockTrace) { + + assert(TSchedModel.hasInstrSchedModelOrItineraries() && + "Missing machine model\n"); + + // Check each definition in NewRoot and compute the latency + unsigned NewRootLatency = 0; + + for (const MachineOperand &MO : NewRoot->operands()) { + // Check for virtual register operand. + if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) + continue; + if (!MO.isDef()) + continue; + // Get the first instruction that uses MO + MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(MO.getReg()); + RI++; + MachineInstr *UseMO = RI->getParent(); + unsigned LatencyOp = 0; + if (UseMO && BlockTrace.isDepInTrace(Root, UseMO)) { + LatencyOp = TSchedModel.computeOperandLatency( + NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO, + UseMO->findRegisterUseOperandIdx(MO.getReg())); + } else { + LatencyOp = TSchedModel.computeInstrLatency(NewRoot); + } + NewRootLatency = std::max(NewRootLatency, LatencyOp); + } + return NewRootLatency; +} + +/// True when the new instruction sequence does not lengthen the critical path +/// and the new sequence has less instructions or the new sequence improves the +/// critical path. +/// The DAGCombine code sequence ends in MI (Machine Instruction) Root. +/// The new code sequence ends in MI NewRoot. A necessary condition for the new +/// sequence to replace the old sequence is that it cannot lengthen the critical +/// path. This is decided by the formula: +/// (NewRootDepth + NewRootLatency) <= (RootDepth + RootLatency + RootSlack)). +/// If the new sequence has an equal length critical path but does not reduce +/// the number of instructions (NewCodeHasLessInsts is false), then it is not +/// considered an improvement. The slack is the number of cycles Root can be +/// delayed before the critical patch becomes longer. +bool MachineReassociation::improvesCriticalPathLen( + MachineBasicBlock *MBB, MachineInstr *Root, + MachineTraceMetrics::Trace BlockTrace, + SmallVectorImpl &InsInstrs, + DenseMap &InstrIdxForVirtReg, + bool NewCodeHasLessInsts) { + + assert(TSchedModel.hasInstrSchedModelOrItineraries() && + "Missing machine model\n"); + // NewRoot is the last instruction in the \p InsInstrs vector. + // Get depth and latency of NewRoot. + unsigned NewRootIdx = InsInstrs.size() - 1; + MachineInstr *NewRoot = InsInstrs[NewRootIdx]; + unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace); + unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace); + + // Get depth, latency and slack of Root. + unsigned RootDepth = BlockTrace.getInstrCycles(Root).Depth; + unsigned RootLatency = TSchedModel.computeInstrLatency(Root); + unsigned RootSlack = BlockTrace.getInstrSlack(Root); + + DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n"; + dbgs() << " NewRootDepth: " << NewRootDepth + << " NewRootLatency: " << NewRootLatency << "\n"; + dbgs() << " RootDepth: " << RootDepth << " RootLatency: " << RootLatency + << " RootSlack: " << RootSlack << "\n"; + dbgs() << " NewRootDepth + NewRootLatency " + << NewRootDepth + NewRootLatency << "\n"; + dbgs() << " RootDepth + RootLatency + RootSlack " + << RootDepth + RootLatency + RootSlack << "\n";); + + unsigned NewCycleCount = NewRootDepth + NewRootLatency; + unsigned OldCycleCount = RootDepth + RootLatency + RootSlack; + + if (NewCodeHasLessInsts) + return NewCycleCount <= OldCycleCount; + else + return NewCycleCount < OldCycleCount; +} + +/// helper routine to convert instructions into SC +void MachineReassociation::instr2instrSC( + SmallVectorImpl &Instrs, + SmallVectorImpl &InstrsSC) { + for (auto *InstrPtr : Instrs) { + unsigned Opc = InstrPtr->getOpcode(); + unsigned Idx = TII->get(Opc).getSchedClass(); + const MCSchedClassDesc *SC = SchedModel.getSchedClassDesc(Idx); + InstrsSC.push_back(SC); + } +} +/// True when the new instructions do not increase resource length +bool MachineReassociation::preservesResourceLen( + MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs) { + if (!TSchedModel.hasInstrSchedModel()) + return true; + + // Compute current resource length + + // ArrayRef MBBarr(MBB); + SmallVector MBBarr; + MBBarr.push_back(MBB); + unsigned ResLenBeforeReassociate = BlockTrace.getResourceLength(MBBarr); + + // Deal with SC rather than Instructions. + SmallVector InsInstrsSC; + SmallVector DelInstrsSC; + + instr2instrSC(InsInstrs, InsInstrsSC); + instr2instrSC(DelInstrs, DelInstrsSC); + + ArrayRef MSCInsArr = makeArrayRef(InsInstrsSC); + ArrayRef MSCDelArr = makeArrayRef(DelInstrsSC); + + // Compute new resource length. + unsigned ResLenAfterReassociate = + BlockTrace.getResourceLength(MBBarr, MSCInsArr, MSCDelArr); + + DEBUG(dbgs() << "RESOURCE DATA: \n"; + dbgs() << " resource len before: " << ResLenBeforeReassociate + << " after: " << ResLenAfterReassociate << "\n";); + + return ResLenAfterReassociate <= ResLenBeforeReassociate; +} + +/// \returns true when new instruction sequence should be generated +/// independent if it lengthens critical path or not +bool MachineReassociation::doSubstitute(unsigned NewSize, unsigned OldSize) { + if (OptSize && (NewSize < OldSize)) + return true; + if (!TSchedModel.hasInstrSchedModelOrItineraries()) + return true; + return false; +} + +bool MachineReassociation::hasReassociableSibling(const MachineInstr &Inst, + bool &Commuted) { + const MachineBasicBlock *MBB = Inst.getParent(); + const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(1).getReg()); + MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg()); + unsigned AssocOpcode = Inst.getOpcode(); + + // If only one operand has the same opcode and it's the second source operand, + // the operands must be commuted. + Commuted = MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode; + if (Commuted) + std::swap(MI1, MI2); + + // 1. The previous instruction must be the same type as Inst. + // 2. The previous instruction must have virtual register definitions for its + // operands in the same basic block as Inst. + // 3. The previous instruction's result must only be used by Inst. + if (MI1->getOpcode() == AssocOpcode && + TII->hasReassociableOperands(*MI1, MBB) && + MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg())) + return true; + + return false; +} + +/// Return true if the input instruction is part of a chain of dependent ops +/// that are suitable for reassociation, otherwise return false. +/// If the instruction's operands must be commuted to have a previous +/// instruction of the same type define the first source operand, Commuted will +/// be set to true. +bool MachineReassociation::isReassociationCandidate(const MachineInstr &Inst, + bool &Commuted) { + // 1. The operation must be associative and commutative. + // 2. The instruction must have virtual register definitions for its + // operands in the same basic block. + // 3. The instruction must have a reassociable sibling. + if (TII->isAssociativeAndCommutative(Inst) && + TII->hasReassociableOperands(Inst, Inst.getParent()) && + hasReassociableSibling(Inst, Commuted)) + return true; + + return false; +} + +/// Return true when there is potentially a faster code sequence +/// for an instruction chain ending in \p Root. All potential patterns are +/// returned in the \p Pattern vector. Pattern should be sorted in priority +/// order since the pattern evaluator stops checking as soon as it finds a +/// faster sequence. + +// FIXME: This has the potential to be expensive (compile time) while not +// improving the code at all. Some ways to limit the overhead: +// 1. Track successful transforms; bail out if hit rate gets too low. +// 2. Only enable at -O3 or some other non-default optimization level. +// 3. Pre-screen pattern candidates here: if an operand of the previous +// instruction is known to not increase the critical path, then don't match +// that pattern. +bool MachineReassociation::getMachineReassociationPatterns( + MachineInstr &Root, + SmallVectorImpl &Patterns) { + // Look for this reassociation pattern: + // B = A op X (Prev) + // C = B op Y (Root) + + bool Commute; + if (isReassociationCandidate(Root, Commute)) { + // We found a sequence of instructions that may be suitable for a + // reassociation of operands to increase ILP. Specify each commutation + // possibility for the Prev instruction in the sequence and let the + // machine reassociation pass decide if changing the operands is worthwhile. + if (Commute) { + Patterns.push_back(MachineReassociationPattern::M_REASSOC_AX_YB); + Patterns.push_back(MachineReassociationPattern::M_REASSOC_XA_YB); + } else { + Patterns.push_back(MachineReassociationPattern::M_REASSOC_AX_BY); + Patterns.push_back(MachineReassociationPattern::M_REASSOC_XA_BY); + } + return true; + } + + return false; +} + +/// When getMachineReassociationPatterns() finds patterns, this function +/// generates the instructions that could replace the original code sequence. +/// The client has to decide whether the actual replacement is beneficial or +/// not. +void MachineReassociation::genAlternativeCodeSequence( + MachineInstr &Root, MachineReassociationPattern::MR_PATTERN Pattern, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs, + DenseMap &InstIdxForVirtReg) { + MachineRegisterInfo &MRI = Root.getParent()->getParent()->getRegInfo(); + + // Select the previous instruction in the sequence based on the input pattern. + MachineInstr *Prev = nullptr; + switch (Pattern) { + case MachineReassociationPattern::M_REASSOC_AX_BY: + case MachineReassociationPattern::M_REASSOC_XA_BY: + Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg()); + break; + case MachineReassociationPattern::M_REASSOC_AX_YB: + case MachineReassociationPattern::M_REASSOC_XA_YB: + Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg()); + } + assert(Prev && "Unknown pattern for machine reassociation"); + + TII->reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, + InstIdxForVirtReg); + return; +} + +/// Substitute a slow code sequence with a faster one by +/// evaluating instruction reassociation pattern. +/// Performs instruction +/// reassociation based on machine trace metrics. Only reassociate a sequence of +/// instructions when this neither lengthens the critical path nor increases +/// resource pressure. When optimizing for codesize always reassociate when the +/// new sequence is shorter. +bool MachineReassociation::reassociateInstructions(MachineBasicBlock *MBB) { + bool Changed = false; + DEBUG(dbgs() << "Reassociate MBB " << MBB->getName() << "\n"); + + auto BlockIter = MBB->begin(); + + while (BlockIter != MBB->end()) { + auto &MI = *BlockIter++; + + DEBUG(dbgs() << "INSTR "; MI.dump(); dbgs() << "\n";); + SmallVector Patterns; + // For each instruction we check if it can be the root of a reassociate + // pattern. Then for each pattern the new code sequence in form of MI is + // generated and evaluated. When the efficiency criteria (don't lengthen + // critical path, don't use more resources) is met the new sequence gets + // hooked up into the basic block before the old sequence is removed. + // + // The algorithm does not try to evaluate all patterns and pick the best. + // This is only an artificial restriction though. In practice there is + // mostly one pattern, and getMachineReassociationPatterns() can order + // patterns based on an internal cost heuristic. + + if (getMachineReassociationPatterns(MI, Patterns)) { + for (auto P : Patterns) { + SmallVector InsInstrs; + SmallVector DelInstrs; + DenseMap InstrIdxForVirtReg; + if (!MinInstr) + MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); + MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB); + Traces->verifyAnalysis(); + genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs, + InstrIdxForVirtReg); + unsigned NewInstCount = InsInstrs.size(); + unsigned OldInstCount = DelInstrs.size(); + // Found pattern, but did not generate alternative sequence. + // This can happen e.g. when an immediate could not be materialized + // in a single instruction. + if (!NewInstCount) + continue; + // Substitute when we optimize for codesize and the new sequence has + // fewer instructions OR + // the new sequence neither lengthens the critical path nor increases + // resource pressure. + if (doSubstitute(NewInstCount, OldInstCount) || + (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, + InstrIdxForVirtReg, + NewInstCount < OldInstCount) && + preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) { + for (auto *InstrPtr : InsInstrs) + MBB->insert((MachineBasicBlock::iterator)&MI, InstrPtr); + for (auto *InstrPtr : DelInstrs) + InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval(); + + Changed = true; + ++NumInstReassociated; + + Traces->invalidate(MBB); + Traces->verifyAnalysis(); + // Eagerly stop after the first pattern fires. + break; + } else { + // Cleanup instructions of the alternative code sequence. There is no + // use for them. + MachineFunction *MF = MBB->getParent(); + for (auto *InstrPtr : InsInstrs) + MF->DeleteMachineInstr(InstrPtr); + } + InstrIdxForVirtReg.clear(); + } + } + } + + return Changed; +} + +bool MachineReassociation::runOnMachineFunction(MachineFunction &MF) { + const TargetSubtargetInfo &STI = MF.getSubtarget(); + TII = STI.getInstrInfo(); + TRI = STI.getRegisterInfo(); + SchedModel = STI.getSchedModel(); + TSchedModel.init(SchedModel, &STI, TII); + MRI = &MF.getRegInfo(); + Traces = &getAnalysis(); + MinInstr = 0; + OptSize = MF.getFunction()->optForSize(); + + DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n'); + if (!TII->useMachineReassociation()) { + DEBUG( + dbgs() + << " Skipping pass: Target does not support machine reassociation\n"); + return false; + } + + bool Changed = false; + + // Try to reassociate instructions. + for (auto &MBB : MF) + Changed |= reassociateInstructions(&MBB); + + return Changed; +} Index: lib/Target/PowerPC/PPCInstrInfo.h =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.h +++ lib/Target/PowerPC/PPCInstrInfo.h @@ -63,19 +63,6 @@ }; } // end namespace PPCII -namespace MachineCombinerPattern { -enum MC_PATTERN : int { - // These are commutative variants for reassociating a computation chain - // of the form: - // B = A op X (Prev) - // C = B op Y (Root) - MC_REASSOC_AX_BY = 0, - MC_REASSOC_AX_YB = 1, - MC_REASSOC_XA_BY = 2, - MC_REASSOC_XA_YB = 3, -}; -} // end namespace MachineCombinerPattern - class PPCSubtarget; class PPCInstrInfo : public PPCGenInstrInfo { PPCSubtarget &Subtarget; @@ -132,24 +119,21 @@ return false; } - bool useMachineCombiner() const override { - return true; - } - - /// Return true when there is potentially a faster code sequence - /// for an instruction chain ending in . All potential patterns are - /// output in the array. - bool getMachineCombinerPatterns( - MachineInstr &Root, - SmallVectorImpl &P) const override; - - /// When getMachineCombinerPatterns() finds a pattern, this function generates - /// the instructions that could replace the original code sequence. - void genAlternativeCodeSequence( - MachineInstr &Root, MachineCombinerPattern::MC_PATTERN P, - SmallVectorImpl &InsInstrs, - SmallVectorImpl &DelInstrs, - DenseMap &InstrIdxForVirtReg) const override; + bool useMachineReassociation() const override; + + /// Return true when Inst has reassociable operands in the same MBB + bool hasReassociableOperands(const MachineInstr &Inst, + const MachineBasicBlock *MBB) const override; + + /// Return true when Inst is both associative and commutative + bool isAssociativeAndCommutative(const MachineInstr &Inst) const override; + + void reassociateOps( + MachineInstr &Root, MachineInstr &Prev, + MachineReassociationPattern::MR_PATTERN Pattern, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs, + DenseMap &InstrIdxForVirtReg) const override; bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, Index: lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.cpp +++ lib/Target/PowerPC/PPCInstrInfo.cpp @@ -189,8 +189,9 @@ return Latency; } -static bool hasVirtualRegDefsInBasicBlock(const MachineInstr &Inst, - const MachineBasicBlock *MBB) { +// static bool hasVirtualRegDefsInBasicBlock(const MachineInstr &Inst, +bool PPCInstrInfo::hasReassociableOperands(const MachineInstr &Inst, + const MachineBasicBlock *MBB) const { const MachineOperand &Op1 = Inst.getOperand(1); const MachineOperand &Op2 = Inst.getOperand(2); const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); @@ -210,33 +211,8 @@ return false; } -static bool hasReassocSibling(const MachineInstr &Inst, bool &Commuted) { - const MachineBasicBlock *MBB = Inst.getParent(); - const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(1).getReg()); - MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg()); - unsigned AssocOpcode = Inst.getOpcode(); - - // If only one operand has the same opcode and it's the second source operand, - // the operands must be commuted. - Commuted = MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode; - if (Commuted) - std::swap(MI1, MI2); - - // 1. The previous instruction must be the same type as Inst. - // 2. The previous instruction must have virtual register definitions for its - // operands in the same basic block as Inst. - // 3. The previous instruction's result must only be used by Inst. - if (MI1->getOpcode() == AssocOpcode && - hasVirtualRegDefsInBasicBlock(*MI1, MBB) && - MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg())) - return true; - - return false; -} - // This function does not list all associative and commutative operations, but -// only those worth feeding through the machine combiner in an attempt to +// only those worth feeding through the machine reassociation in an attempt to // reduce the critical path. Mostly, this means floating-point operations, // because they have high latencies (compared to other operations, such and // and/or, which are also associative and commutative, but have low latencies). @@ -254,8 +230,9 @@ // // breaking the dependency between A and B, allowing them to be executed in // parallel (or back-to-back in a pipeline) instead of depending on each other. -static bool isAssociativeAndCommutative(unsigned Opcode) { - switch (Opcode) { +//static bool isAssociativeAndCommutative(unsigned Opcode) { +bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const{ + switch (Inst.getOpcode()) { // FP Add: case PPC::FADD: case PPC::FADDS: @@ -288,60 +265,11 @@ } } -/// Return true if the input instruction is part of a chain of dependent ops -/// that are suitable for reassociation, otherwise return false. -/// If the instruction's operands must be commuted to have a previous -/// instruction of the same type define the first source operand, Commuted will -/// be set to true. -static bool isReassocCandidate(const MachineInstr &Inst, bool &Commuted) { - // 1. The operation must be associative and commutative. - // 2. The instruction must have virtual register definitions for its - // operands in the same basic block. - // 3. The instruction must have a reassociable sibling. - if (isAssociativeAndCommutative(Inst.getOpcode()) && - hasVirtualRegDefsInBasicBlock(Inst, Inst.getParent()) && - hasReassocSibling(Inst, Commuted)) - return true; - - return false; -} - -bool PPCInstrInfo::getMachineCombinerPatterns(MachineInstr &Root, - SmallVectorImpl &Patterns) const { - // Using the machine combiner in this way is potentially expensive, so - // restrict to when aggressive optimizations are desired. +bool PPCInstrInfo::useMachineReassociation() const { if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOpt::Aggressive) return false; - // FP reassociation is only legal when we don't need strict IEEE semantics. - if (!Root.getParent()->getParent()->getTarget().Options.UnsafeFPMath) - return false; - - // Look for this reassociation pattern: - // B = A op X (Prev) - // C = B op Y (Root) - - // FIXME: We should also match FMA operations here, where we consider the - // 'part' of the FMA, either the addition or the multiplication, paired with - // an actual addition or multiplication. - - bool Commute; - if (isReassocCandidate(Root, Commute)) { - // We found a sequence of instructions that may be suitable for a - // reassociation of operands to increase ILP. Specify each commutation - // possibility for the Prev instruction in the sequence and let the - // machine combiner decide if changing the operands is worthwhile. - if (Commute) { - Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_YB); - Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_YB); - } else { - Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_BY); - Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_BY); - } - return true; - } - - return false; + return true; } /// Attempt the following reassociation to reduce critical path length: @@ -350,11 +278,11 @@ /// ===> /// B = X op Y /// C = A op B -static void reassociateOps(MachineInstr &Root, MachineInstr &Prev, - MachineCombinerPattern::MC_PATTERN Pattern, - SmallVectorImpl &InsInstrs, - SmallVectorImpl &DelInstrs, - DenseMap &InstrIdxForVirtReg) { +void PPCInstrInfo::reassociateOps(MachineInstr &Root, MachineInstr &Prev, + MachineReassociationPattern::MR_PATTERN Pattern, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs, + DenseMap &InstrIdxForVirtReg) const { MachineFunction *MF = Root.getParent()->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); @@ -395,8 +323,9 @@ MRI.constrainRegClass(RegC, RC); // Create a new virtual register for the result of (X op Y) instead of - // recycling RegB because the MachineCombiner's computation of the critical - // path requires a new register definition rather than an existing one. + // recycling RegB because the MachineReassociation's computation of the + // cirtical path requires a new register definition rather than an existing + // one. unsigned NewVR = MRI.createVirtualRegister(RC); InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); @@ -423,31 +352,6 @@ DelInstrs.push_back(&Root); } -void PPCInstrInfo::genAlternativeCodeSequence( - MachineInstr &Root, - MachineCombinerPattern::MC_PATTERN Pattern, - SmallVectorImpl &InsInstrs, - SmallVectorImpl &DelInstrs, - DenseMap &InstIdxForVirtReg) const { - MachineRegisterInfo &MRI = Root.getParent()->getParent()->getRegInfo(); - - // Select the previous instruction in the sequence based on the input pattern. - MachineInstr *Prev = nullptr; - switch (Pattern) { - case MachineCombinerPattern::MC_REASSOC_AX_BY: - case MachineCombinerPattern::MC_REASSOC_XA_BY: - Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg()); - break; - case MachineCombinerPattern::MC_REASSOC_AX_YB: - case MachineCombinerPattern::MC_REASSOC_XA_YB: - Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg()); - } - assert(Prev && "Unknown pattern for machine combiner"); - - reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg); - return; -} - // Detect 32 -> 64-bit extensions where we may reuse the low sub-register. bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, Index: lib/Target/PowerPC/PPCTargetMachine.cpp =================================================================== --- lib/Target/PowerPC/PPCTargetMachine.cpp +++ lib/Target/PowerPC/PPCTargetMachine.cpp @@ -58,8 +58,8 @@ cl::init(true), cl::Hidden); static cl::opt -EnableMachineCombinerPass("ppc-machine-combiner", - cl::desc("Enable the machine combiner pass"), +EnableMachineReassociationPass("ppc-machine-reassociation", + cl::desc("Enable the machine reassocitaion pass"), cl::init(true), cl::Hidden); extern "C" void LLVMInitializePowerPCTarget() { @@ -322,8 +322,8 @@ bool PPCPassConfig::addILPOpts() { addPass(&EarlyIfConverterID); - if (EnableMachineCombinerPass) - addPass(&MachineCombinerID); + if (EnableMachineReassociationPass) + addPass(&MachineReassociationID); return true; } Index: lib/Target/X86/X86InstrInfo.h =================================================================== --- lib/Target/X86/X86InstrInfo.h +++ lib/Target/X86/X86InstrInfo.h @@ -26,19 +26,6 @@ class X86RegisterInfo; class X86Subtarget; - namespace MachineCombinerPattern { - enum MC_PATTERN : int { - // These are commutative variants for reassociating a computation chain - // of the form: - // B = A op X (Prev) - // C = B op Y (Root) - MC_REASSOC_AX_BY = 0, - MC_REASSOC_AX_YB = 1, - MC_REASSOC_XA_BY = 2, - MC_REASSOC_XA_YB = 3, - }; - } // end namespace MachineCombinerPattern - namespace X86 { // X86 specific condition code. These correspond to X86_*_COND in // X86InstrInfo.td. They must be kept in synch. @@ -453,24 +440,23 @@ unsigned UseIdx) const override; - bool useMachineCombiner() const override { + bool useMachineReassociation() const override { return true; } - - /// Return true when there is potentially a faster code sequence - /// for an instruction chain ending in . All potential patterns are - /// output in the array. - bool getMachineCombinerPatterns( - MachineInstr &Root, - SmallVectorImpl &P) const override; - - /// When getMachineCombinerPatterns() finds a pattern, this function generates - /// the instructions that could replace the original code sequence. - void genAlternativeCodeSequence( - MachineInstr &Root, MachineCombinerPattern::MC_PATTERN P, - SmallVectorImpl &InsInstrs, - SmallVectorImpl &DelInstrs, - DenseMap &InstrIdxForVirtReg) const override; + + /// Return true when Inst has reassociable operands in the same MBB + bool hasReassociableOperands(const MachineInstr &Inst, + const MachineBasicBlock *MBB) const override; + + /// Return true when Inst is both associative and commutative + bool isAssociativeAndCommutative(const MachineInstr &Inst) const override; + + void reassociateOps( + MachineInstr &Root, MachineInstr &Prev, + MachineReassociationPattern::MR_PATTERN Pattern, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs, + DenseMap &InstrIdxForVirtReg) const override; /// analyzeCompare - For a comparison instruction, return the source registers /// in SrcReg and SrcReg2 if having two register operands, and the value it Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -6324,8 +6324,8 @@ return isHighLatencyDef(DefMI->getOpcode()); } -static bool hasReassociableOperands(const MachineInstr &Inst, - const MachineBasicBlock *MBB) { +bool X86InstrInfo::hasReassociableOperands(const MachineInstr &Inst, + const MachineBasicBlock *MBB) const { assert((Inst.getNumOperands() == 3 || Inst.getNumOperands() == 4) && "Reassociation needs binary operators"); const MachineOperand &Op1 = Inst.getOperand(1); @@ -6362,36 +6362,11 @@ return false; } -static bool hasReassociableSibling(const MachineInstr &Inst, bool &Commuted) { - const MachineBasicBlock *MBB = Inst.getParent(); - const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(1).getReg()); - MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg()); - unsigned AssocOpcode = Inst.getOpcode(); - - // If only one operand has the same opcode and it's the second source operand, - // the operands must be commuted. - Commuted = MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode; - if (Commuted) - std::swap(MI1, MI2); - - // 1. The previous instruction must be the same type as Inst. - // 2. The previous instruction must have virtual register definitions for its - // operands in the same basic block as Inst. - // 3. The previous instruction's result must only be used by Inst. - if (MI1->getOpcode() == AssocOpcode && - hasReassociableOperands(*MI1, MBB) && - MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg())) - return true; - - return false; -} - // TODO: There are many more machine instruction opcodes to match: // 1. Other data types (integer, vectors) // 2. Other math / logic operations (xor, or) // 3. Other forms of the same operation (intrinsics and other variants) -static bool isAssociativeAndCommutative(const MachineInstr &Inst) { +bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const{ switch (Inst.getOpcode()) { case X86::AND8rr: case X86::AND16rr: @@ -6464,59 +6439,6 @@ } } -/// Return true if the input instruction is part of a chain of dependent ops -/// that are suitable for reassociation, otherwise return false. -/// If the instruction's operands must be commuted to have a previous -/// instruction of the same type define the first source operand, Commuted will -/// be set to true. -static bool isReassociationCandidate(const MachineInstr &Inst, bool &Commuted) { - // 1. The operation must be associative and commutative. - // 2. The instruction must have virtual register definitions for its - // operands in the same basic block. - // 3. The instruction must have a reassociable sibling. - if (isAssociativeAndCommutative(Inst) && - hasReassociableOperands(Inst, Inst.getParent()) && - hasReassociableSibling(Inst, Commuted)) - return true; - - return false; -} - -// FIXME: This has the potential to be expensive (compile time) while not -// improving the code at all. Some ways to limit the overhead: -// 1. Track successful transforms; bail out if hit rate gets too low. -// 2. Only enable at -O3 or some other non-default optimization level. -// 3. Pre-screen pattern candidates here: if an operand of the previous -// instruction is known to not increase the critical path, then don't match -// that pattern. -bool X86InstrInfo::getMachineCombinerPatterns(MachineInstr &Root, - SmallVectorImpl &Patterns) const { - // TODO: There is nothing x86-specific here except the instruction type. - // This logic could be hoisted into the machine combiner pass itself. - - // Look for this reassociation pattern: - // B = A op X (Prev) - // C = B op Y (Root) - - bool Commute; - if (isReassociationCandidate(Root, Commute)) { - // We found a sequence of instructions that may be suitable for a - // reassociation of operands to increase ILP. Specify each commutation - // possibility for the Prev instruction in the sequence and let the - // machine combiner decide if changing the operands is worthwhile. - if (Commute) { - Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_YB); - Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_YB); - } else { - Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_BY); - Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_BY); - } - return true; - } - - return false; -} - /// This is an architecture-specific helper function of reassociateOps. /// Set special operand attributes for new instructions after reassociation. static void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2, @@ -6528,7 +6450,7 @@ assert(NewMI1.getNumOperands() == 4 && NewMI2.getNumOperands() == 4 && "Unexpected instruction type for reassociation"); - + MachineOperand &OldOp1 = OldMI1.getOperand(3); MachineOperand &OldOp2 = OldMI2.getOperand(3); MachineOperand &NewOp1 = NewMI1.getOperand(3); @@ -6561,11 +6483,12 @@ /// ===> /// B = X op Y /// C = A op B -static void reassociateOps(MachineInstr &Root, MachineInstr &Prev, - MachineCombinerPattern::MC_PATTERN Pattern, - SmallVectorImpl &InsInstrs, - SmallVectorImpl &DelInstrs, - DenseMap &InstrIdxForVirtReg) { +void X86InstrInfo::reassociateOps( + MachineInstr &Root, MachineInstr &Prev, + MachineReassociationPattern::MR_PATTERN Pattern, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs, + DenseMap &InstrIdxForVirtReg) const { MachineFunction *MF = Root.getParent()->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); @@ -6606,7 +6529,7 @@ MRI.constrainRegClass(RegC, RC); // Create a new virtual register for the result of (X op Y) instead of - // recycling RegB because the MachineCombiner's computation of the critical + // recycling RegB because the MachineReassociation's computation of the critical // path requires a new register definition rather than an existing one. unsigned NewVR = MRI.createVirtualRegister(RC); InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); @@ -6635,31 +6558,6 @@ DelInstrs.push_back(&Root); } -void X86InstrInfo::genAlternativeCodeSequence( - MachineInstr &Root, - MachineCombinerPattern::MC_PATTERN Pattern, - SmallVectorImpl &InsInstrs, - SmallVectorImpl &DelInstrs, - DenseMap &InstIdxForVirtReg) const { - MachineRegisterInfo &MRI = Root.getParent()->getParent()->getRegInfo(); - - // Select the previous instruction in the sequence based on the input pattern. - MachineInstr *Prev = nullptr; - switch (Pattern) { - case MachineCombinerPattern::MC_REASSOC_AX_BY: - case MachineCombinerPattern::MC_REASSOC_XA_BY: - Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg()); - break; - case MachineCombinerPattern::MC_REASSOC_AX_YB: - case MachineCombinerPattern::MC_REASSOC_XA_YB: - Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg()); - } - assert(Prev && "Unknown pattern for machine combiner"); - - reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg); - return; -} - std::pair X86InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { return std::make_pair(TF, 0u); Index: lib/Target/X86/X86TargetMachine.cpp =================================================================== --- lib/Target/X86/X86TargetMachine.cpp +++ lib/Target/X86/X86TargetMachine.cpp @@ -24,8 +24,8 @@ #include "llvm/Target/TargetOptions.h" using namespace llvm; -static cl::opt EnableMachineCombinerPass("x86-machine-combiner", - cl::desc("Enable the machine combiner pass"), +static cl::opt EnableMachineReassociationPass("x86-machine-reassociation", + cl::desc("Enable the machine reassociation pass"), cl::init(true), cl::Hidden); namespace llvm { @@ -239,8 +239,8 @@ bool X86PassConfig::addILPOpts() { addPass(&EarlyIfConverterID); - if (EnableMachineCombinerPass) - addPass(&MachineCombinerID); + if (EnableMachineReassociationPass) + addPass(&MachineReassociationID); return true; } Index: test/CodeGen/X86/machine-combiner-int.ll =================================================================== --- test/CodeGen/X86/machine-combiner-int.ll +++ test/CodeGen/X86/machine-combiner-int.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -stop-after machine-combiner -o /dev/null 2>&1 | FileCheck %s --check-prefix=DEAD +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -stop-after machine-reassociate -o /dev/null 2>&1 | FileCheck %s --check-prefix=DEAD ; Verify that integer multiplies are reassociated. The first multiply in ; each test should be independent of the result of the preceding add (lea).