Index: include/llvm/CodeGen/MachineCombinerPattern.h =================================================================== --- include/llvm/CodeGen/MachineCombinerPattern.h +++ include/llvm/CodeGen/MachineCombinerPattern.h @@ -22,7 +22,28 @@ /// namespace MachineCombinerPattern { // Forward declaration -enum MC_PATTERN : int; +enum MC_PATTERN : int { + // specific for reassociation + MC_REASSOC_AX_BY = 0, + MC_REASSOC_AX_YB = 1, + MC_REASSOC_XA_BY = 2, + MC_REASSOC_XA_YB = 3, + + // AArch64 specific for combining mul & add/sub + MC_NONE = 4, + MC_MULADDW_OP1 = 5, + MC_MULADDW_OP2 = 6, + MC_MULSUBW_OP1 = 7, + MC_MULSUBW_OP2 = 8, + MC_MULADDWI_OP1 = 9, + MC_MULSUBWI_OP1 = 10, + MC_MULADDX_OP1 = 11, + MC_MULADDX_OP2 = 12, + MC_MULSUBX_OP1 = 13, + MC_MULSUBX_OP2 = 14, + MC_MULADDXI_OP1 = 15, + MC_MULSUBXI_OP1 = 16, +}; } // end namespace MachineCombinerPattern } // end namespace llvm Index: include/llvm/Target/TargetInstrInfo.h =================================================================== --- include/llvm/Target/TargetInstrInfo.h +++ include/llvm/Target/TargetInstrInfo.h @@ -718,6 +718,9 @@ ArrayRef Ops, MachineInstr *LoadMI) const; + /// Return true when a target supports MachineCombiner. + virtual bool useMachineCombiner() const { return false; } + /// Return true when there is potentially a faster code sequence /// for an instruction chain ending in \p Root. All potential patterns are /// returned in the \p Pattern vector. Pattern should be sorted in priority @@ -749,8 +752,31 @@ return; } - /// Return true when a target supports MachineCombiner. - virtual bool useMachineCombiner() const { return false; } + /// Return true when a target supports MachineReassociation. + virtual bool useMachineReassociation(bool UnsafeFPMath) const { + return false; + } + + /// Return true when \P Inst is both associative and commutative + virtual bool isAssociativeAndCommutative(const MachineInstr &Inst) const { + return false; + } + + /// Return true when \P Inst has reassociable operands in the same \P MBB + virtual bool hasReassociableOperands(const MachineInstr &Inst, + const MachineBasicBlock *MBB) const { + return false; + } + + /// Attempt to reassociate the instructions to reduce critical path length. + virtual void + reassociateOps(MachineInstr &Root, MachineInstr &Prev, + MachineCombinerPattern::MC_PATTERN Pattern, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs, + DenseMap &InstrIdxForVirtReg) const { + return; + } protected: /// Target-dependent implementation for foldMemoryOperand. Index: lib/CodeGen/MachineCombiner.cpp =================================================================== --- lib/CodeGen/MachineCombiner.cpp +++ lib/CodeGen/MachineCombiner.cpp @@ -59,7 +59,7 @@ private: bool doSubstitute(unsigned NewSize, unsigned OldSize); - bool combineInstructions(MachineBasicBlock *); + bool combineInstructions(MachineBasicBlock *, bool isReassociation); MachineInstr *getOperandDef(const MachineOperand &MO); unsigned getDepth(SmallVectorImpl &InsInstrs, DenseMap &InstrIdxForVirtReg, @@ -78,6 +78,21 @@ SmallVectorImpl &DelInstrs); void instr2instrSC(SmallVectorImpl &Instrs, SmallVectorImpl &InstrsSC); + + bool hasReassociableSibling(const MachineInstr &Inst, bool &Commuted); + + bool isReassociationCandidate(const MachineInstr &Inst, bool &Commuted); + + bool getMachineReassociationPatterns( + MachineInstr &Root, + SmallVectorImpl &Patterns); + + void + genAlterReassocCodeSequence(MachineInstr &Root, + MachineCombinerPattern::MC_PATTERN Pattern, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs, + DenseMap &InstrIdxForVirtReg); }; } @@ -320,6 +335,119 @@ return false; } +bool MachineCombiner::hasReassociableSibling(const MachineInstr &Inst, + bool &Commuted) { + const MachineBasicBlock *MBB = Inst.getParent(); + const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(1).getReg()); + MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg()); + unsigned AssocOpcode = Inst.getOpcode(); + + // If only one operand has the same opcode and it's the second source operand, + // the operands must be commuted. + Commuted = MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode; + if (Commuted) + std::swap(MI1, MI2); + + // 1. The previous instruction must be the same type as Inst. + // 2. The previous instruction must have virtual register definitions for its + // operands in the same basic block as Inst. + // 3. The previous instruction's result must only be used by Inst. + if (MI1->getOpcode() == AssocOpcode && + TII->hasReassociableOperands(*MI1, MBB) && + MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg())) + return true; + + return false; +} + +/// Return true if the input instruction is part of a chain of dependent ops +/// that are suitable for reassociation, otherwise return false. +/// If the instruction's operands must be commuted to have a previous +/// instruction of the same type define the first source operand, Commuted will +/// be set to true. +bool MachineCombiner::isReassociationCandidate(const MachineInstr &Inst, + bool &Commuted) { + // 1. The operation must be associative and commutative. + // 2. The instruction must have virtual register definitions for its + // operands in the same basic block. + // 3. The instruction must have a reassociable sibling. + if (TII->isAssociativeAndCommutative(Inst) && + TII->hasReassociableOperands(Inst, Inst.getParent()) && + hasReassociableSibling(Inst, Commuted)) + return true; + + return false; +} + +/// Return true when there is potentially a faster code sequence +/// for an instruction chain ending in . All potential patterns are +/// output in the array. + +// FIXME: This has the potential to be expensive (compile time) while not +// improving the code at all. Some ways to limit the overhead: +// 1. Track successful transforms; bail out if hit rate gets too low. +// 2. Only enable at -O3 or some other non-default optimization level. +// 3. Pre-screen pattern candidates here: if an operand of the previous +// instruction is known to not increase the critical path, then don't match +// that pattern. +bool MachineCombiner::getMachineReassociationPatterns( + MachineInstr &Root, + SmallVectorImpl &Patterns) { + // Look for this reassociation pattern: + // B = A op X (Prev) + // C = B op Y (Root) + bool Commute; + if (isReassociationCandidate(Root, Commute)) { + // We found a sequence of instructions that may be suitable for a + // reassociation of operands to increase ILP. Specify each commutation + // possibility for the Prev instruction in the sequence and let the + // machine combiner decide if changing the operands is worthwhile. + if (Commute) { + Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_YB); + Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_YB); + } else { + Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_BY); + Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_BY); + } + return true; + } + + return false; +} + +/// When getMachineCombinerPatterns() finds a pattern, this function generates +/// the instructions that could replace the original code sequence. +void MachineCombiner::genAlterReassocCodeSequence( + MachineInstr &Root, MachineCombinerPattern::MC_PATTERN Pattern, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs, + DenseMap &InstrIdxForVirtReg) { + + MachineRegisterInfo &MRI = Root.getParent()->getParent()->getRegInfo(); + + // Select the previous instruction in the sequence based on the input pattern. + MachineInstr *Prev = nullptr; + switch (Pattern) { + case MachineCombinerPattern::MC_REASSOC_AX_BY: + case MachineCombinerPattern::MC_REASSOC_XA_BY: + Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg()); + break; + case MachineCombinerPattern::MC_REASSOC_AX_YB: + case MachineCombinerPattern::MC_REASSOC_XA_YB: + Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg()); + break; + default: + break; + } + assert(Prev && "Unknown pattern for machine reassociation"); + + TII->reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, + InstrIdxForVirtReg); + + return; +} + /// Substitute a slow code sequence with a faster one by /// evaluating instruction combining pattern. /// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction @@ -327,7 +455,8 @@ /// instructions when this neither lengthens the critical path nor increases /// resource pressure. When optimizing for codesize always combine when the new /// sequence is shorter. -bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { +bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB, + bool isReassociation) { bool Changed = false; DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n"); @@ -363,54 +492,69 @@ // mostly one pattern, and getMachineCombinerPatterns() can order patterns // based on an internal cost heuristic. - if (TII->getMachineCombinerPatterns(MI, Patterns)) { - for (auto P : Patterns) { - SmallVector InsInstrs; - SmallVector DelInstrs; - DenseMap InstrIdxForVirtReg; - if (!MinInstr) - MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); - MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB); - Traces->verifyAnalysis(); + bool hasPatterns = false; + + if (isReassociation) + hasPatterns = getMachineReassociationPatterns(MI, Patterns); + else + hasPatterns = TII->getMachineCombinerPatterns(MI, Patterns); + + if (!hasPatterns) + continue; + + for (auto P : Patterns) { + SmallVector InsInstrs; + SmallVector DelInstrs; + DenseMap InstrIdxForVirtReg; + if (!MinInstr) + MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); + MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB); + Traces->verifyAnalysis(); + + if (isReassociation) { + genAlterReassocCodeSequence(MI, P, InsInstrs, DelInstrs, + InstrIdxForVirtReg); + } else { TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs, InstrIdxForVirtReg); - unsigned NewInstCount = InsInstrs.size(); - unsigned OldInstCount = DelInstrs.size(); - // Found pattern, but did not generate alternative sequence. - // This can happen e.g. when an immediate could not be materialized - // in a single instruction. - if (!NewInstCount) - continue; - // Substitute when we optimize for codesize and the new sequence has - // fewer instructions OR - // the new sequence neither lengthens the critical path nor increases - // resource pressure. - if (doSubstitute(NewInstCount, OldInstCount) || - (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, - InstrIdxForVirtReg, - NewInstCount < OldInstCount) && - preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) { - for (auto *InstrPtr : InsInstrs) - MBB->insert((MachineBasicBlock::iterator) &MI, InstrPtr); - for (auto *InstrPtr : DelInstrs) - InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval(); - - Changed = true; - ++NumInstCombined; - - Traces->invalidate(MBB); - Traces->verifyAnalysis(); - // Eagerly stop after the first pattern fires. - break; - } else { - // Cleanup instructions of the alternative code sequence. There is no - // use for them. - MachineFunction *MF = MBB->getParent(); - for (auto *InstrPtr : InsInstrs) - MF->DeleteMachineInstr(InstrPtr); - } - InstrIdxForVirtReg.clear(); } + + unsigned NewInstCount = InsInstrs.size(); + unsigned OldInstCount = DelInstrs.size(); + // Found pattern, but did not generate alternative sequence. + // This can happen e.g. when an immediate could not be materialized + // in a single instruction. + if (!NewInstCount) + continue; + // Substitute when we optimize for codesize and the new sequence has + // fewer instructions OR + // the new sequence neither lengthens the critical path nor increases + // resource pressure. + if (doSubstitute(NewInstCount, OldInstCount) || + (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, + InstrIdxForVirtReg, + NewInstCount < OldInstCount) && + preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) { + for (auto *InstrPtr : InsInstrs) + MBB->insert((MachineBasicBlock::iterator)&MI, InstrPtr); + for (auto *InstrPtr : DelInstrs) + InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval(); + + Changed = true; + ++NumInstCombined; + + Traces->invalidate(MBB); + Traces->verifyAnalysis(); + // Eagerly stop after the first pattern fires. + break; + } else { + // Cleanup instructions of the alternative code sequence. There is no + // use for them. + MachineFunction *MF = MBB->getParent(); + for (auto *InstrPtr : InsInstrs) + MF->DeleteMachineInstr(InstrPtr); + } + InstrIdxForVirtReg.clear(); } } @@ -429,16 +573,26 @@ OptSize = MF.getFunction()->optForSize(); DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n'); - if (!TII->useMachineCombiner()) { - DEBUG(dbgs() << " Skipping pass: Target does not support machine combiner\n"); + + bool UnsafeFPMath = MF.getTarget().Options.UnsafeFPMath; + + if (!TII->useMachineCombiner() && + !TII->useMachineReassociation(UnsafeFPMath)) { + DEBUG(dbgs() + << " Skipping pass: Target does not support machine combiner\n"); return false; } bool Changed = false; // Try to combine instructions. - for (auto &MBB : MF) - Changed |= combineInstructions(&MBB); + for (auto &MBB : MF) { + if (TII->useMachineCombiner()) + Changed |= combineInstructions(&MBB, false); + + if (TII->useMachineReassociation(UnsafeFPMath)) + Changed |= combineInstructions(&MBB, true); + } return Changed; } Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "AArch64InstrInfo.h" -#include "AArch64MachineCombinerPattern.h" #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/CodeGen/MachineFrameInfo.h" Index: lib/Target/AArch64/AArch64MachineCombinerPattern.h =================================================================== --- lib/Target/AArch64/AArch64MachineCombinerPattern.h +++ /dev/null @@ -1,42 +0,0 @@ -//===- AArch64MachineCombinerPattern.h -===// -//===- AArch64 instruction pattern supported by combiner -===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines instruction pattern supported by combiner -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64MACHINECOMBINERPATTERN_H -#define LLVM_LIB_TARGET_AARCH64_AARCH64MACHINECOMBINERPATTERN_H - -namespace llvm { - -/// Enumeration of instruction pattern supported by machine combiner -/// -/// -namespace MachineCombinerPattern { -enum MC_PATTERN : int { - MC_NONE = 0, - MC_MULADDW_OP1 = 1, - MC_MULADDW_OP2 = 2, - MC_MULSUBW_OP1 = 3, - MC_MULSUBW_OP2 = 4, - MC_MULADDWI_OP1 = 5, - MC_MULSUBWI_OP1 = 6, - MC_MULADDX_OP1 = 7, - MC_MULADDX_OP2 = 8, - MC_MULSUBX_OP1 = 9, - MC_MULSUBX_OP2 = 10, - MC_MULADDXI_OP1 = 11, - MC_MULSUBXI_OP1 = 12 -}; -} // end namespace MachineCombinerPattern -} // end namespace llvm - -#endif Index: lib/Target/PowerPC/PPCInstrInfo.h =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.h +++ lib/Target/PowerPC/PPCInstrInfo.h @@ -63,19 +63,6 @@ }; } // end namespace PPCII -namespace MachineCombinerPattern { -enum MC_PATTERN : int { - // These are commutative variants for reassociating a computation chain - // of the form: - // B = A op X (Prev) - // C = B op Y (Root) - MC_REASSOC_AX_BY = 0, - MC_REASSOC_AX_YB = 1, - MC_REASSOC_XA_BY = 2, - MC_REASSOC_XA_YB = 3, -}; -} // end namespace MachineCombinerPattern - class PPCSubtarget; class PPCInstrInfo : public PPCGenInstrInfo { PPCSubtarget &Subtarget; @@ -132,24 +119,23 @@ return false; } - bool useMachineCombiner() const override { - return true; - } - - /// Return true when there is potentially a faster code sequence - /// for an instruction chain ending in . All potential patterns are - /// output in the array. - bool getMachineCombinerPatterns( - MachineInstr &Root, - SmallVectorImpl &P) const override; - - /// When getMachineCombinerPatterns() finds a pattern, this function generates - /// the instructions that could replace the original code sequence. - void genAlternativeCodeSequence( - MachineInstr &Root, MachineCombinerPattern::MC_PATTERN P, - SmallVectorImpl &InsInstrs, - SmallVectorImpl &DelInstrs, - DenseMap &InstrIdxForVirtReg) const override; + bool useMachineCombiner() const override { return false; } + + bool useMachineReassociation(bool UnsafeFPMath) const override; + + /// Return true when Inst is both associative and commutative + bool isAssociativeAndCommutative(const MachineInstr &Inst) const override; + + /// Return true when Inst has reassociable operands in the same MBB + bool hasReassociableOperands(const MachineInstr &Inst, + const MachineBasicBlock *MBB) const override; + + void reassociateOps( + MachineInstr &Root, MachineInstr &Prev, + MachineCombinerPattern::MC_PATTERN Pattern, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs, + DenseMap &InstrIdxForVirtReg) const override; bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, Index: lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.cpp +++ lib/Target/PowerPC/PPCInstrInfo.cpp @@ -189,8 +189,18 @@ return Latency; } -static bool hasVirtualRegDefsInBasicBlock(const MachineInstr &Inst, - const MachineBasicBlock *MBB) { +bool PPCInstrInfo::useMachineReassociation(bool UnsafeFPMath) const { + // Using the machine combiner in this way is potentially expensive, so + // restrict to when aggressive optimizations are desired. + if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOpt::Aggressive) + return false; + + // FP reassociation is only legal when we don't need strict IEEE semantics. + return UnsafeFPMath; +} + +bool PPCInstrInfo::hasReassociableOperands(const MachineInstr &Inst, + const MachineBasicBlock *MBB) const { const MachineOperand &Op1 = Inst.getOperand(1); const MachineOperand &Op2 = Inst.getOperand(2); const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); @@ -210,31 +220,6 @@ return false; } -static bool hasReassocSibling(const MachineInstr &Inst, bool &Commuted) { - const MachineBasicBlock *MBB = Inst.getParent(); - const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(1).getReg()); - MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg()); - unsigned AssocOpcode = Inst.getOpcode(); - - // If only one operand has the same opcode and it's the second source operand, - // the operands must be commuted. - Commuted = MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode; - if (Commuted) - std::swap(MI1, MI2); - - // 1. The previous instruction must be the same type as Inst. - // 2. The previous instruction must have virtual register definitions for its - // operands in the same basic block as Inst. - // 3. The previous instruction's result must only be used by Inst. - if (MI1->getOpcode() == AssocOpcode && - hasVirtualRegDefsInBasicBlock(*MI1, MBB) && - MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg())) - return true; - - return false; -} - // This function does not list all associative and commutative operations, but // only those worth feeding through the machine combiner in an attempt to // reduce the critical path. Mostly, this means floating-point operations, @@ -254,8 +239,8 @@ // // breaking the dependency between A and B, allowing them to be executed in // parallel (or back-to-back in a pipeline) instead of depending on each other. -static bool isAssociativeAndCommutative(unsigned Opcode) { - switch (Opcode) { +bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const { + switch (Inst.getOpcode()) { // FP Add: case PPC::FADD: case PPC::FADDS: @@ -288,73 +273,18 @@ } } -/// Return true if the input instruction is part of a chain of dependent ops -/// that are suitable for reassociation, otherwise return false. -/// If the instruction's operands must be commuted to have a previous -/// instruction of the same type define the first source operand, Commuted will -/// be set to true. -static bool isReassocCandidate(const MachineInstr &Inst, bool &Commuted) { - // 1. The operation must be associative and commutative. - // 2. The instruction must have virtual register definitions for its - // operands in the same basic block. - // 3. The instruction must have a reassociable sibling. - if (isAssociativeAndCommutative(Inst.getOpcode()) && - hasVirtualRegDefsInBasicBlock(Inst, Inst.getParent()) && - hasReassocSibling(Inst, Commuted)) - return true; - - return false; -} - -bool PPCInstrInfo::getMachineCombinerPatterns(MachineInstr &Root, - SmallVectorImpl &Patterns) const { - // Using the machine combiner in this way is potentially expensive, so - // restrict to when aggressive optimizations are desired. - if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOpt::Aggressive) - return false; - - // FP reassociation is only legal when we don't need strict IEEE semantics. - if (!Root.getParent()->getParent()->getTarget().Options.UnsafeFPMath) - return false; - - // Look for this reassociation pattern: - // B = A op X (Prev) - // C = B op Y (Root) - - // FIXME: We should also match FMA operations here, where we consider the - // 'part' of the FMA, either the addition or the multiplication, paired with - // an actual addition or multiplication. - - bool Commute; - if (isReassocCandidate(Root, Commute)) { - // We found a sequence of instructions that may be suitable for a - // reassociation of operands to increase ILP. Specify each commutation - // possibility for the Prev instruction in the sequence and let the - // machine combiner decide if changing the operands is worthwhile. - if (Commute) { - Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_YB); - Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_YB); - } else { - Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_BY); - Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_BY); - } - return true; - } - - return false; -} - /// Attempt the following reassociation to reduce critical path length: /// B = A op X (Prev) /// C = B op Y (Root) /// ===> /// B = X op Y /// C = A op B -static void reassociateOps(MachineInstr &Root, MachineInstr &Prev, - MachineCombinerPattern::MC_PATTERN Pattern, - SmallVectorImpl &InsInstrs, - SmallVectorImpl &DelInstrs, - DenseMap &InstrIdxForVirtReg) { +void PPCInstrInfo::reassociateOps( + MachineInstr &Root, MachineInstr &Prev, + MachineCombinerPattern::MC_PATTERN Pattern, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs, + DenseMap &InstrIdxForVirtReg) const { MachineFunction *MF = Root.getParent()->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); @@ -365,10 +295,10 @@ // operands may be commuted. Each row corresponds to a pattern value, // and each column specifies the index of A, B, X, Y. unsigned OpIdx[4][4] = { - { 1, 1, 2, 2 }, - { 1, 2, 2, 1 }, - { 2, 1, 1, 2 }, - { 2, 2, 1, 1 } + {1, 1, 2, 2}, + {1, 2, 2, 1}, + {2, 1, 1, 2}, + {2, 2, 1, 1} }; MachineOperand &OpA = Prev.getOperand(OpIdx[Pattern][0]); @@ -407,15 +337,15 @@ // Create new instructions for insertion. MachineInstrBuilder MIB1 = - BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR) - .addReg(RegX, getKillRegState(KillX)) - .addReg(RegY, getKillRegState(KillY)); + BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR) + .addReg(RegX, getKillRegState(KillX)) + .addReg(RegY, getKillRegState(KillY)); InsInstrs.push_back(MIB1); MachineInstrBuilder MIB2 = - BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC) - .addReg(RegA, getKillRegState(KillA)) - .addReg(NewVR, getKillRegState(true)); + BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC) + .addReg(RegA, getKillRegState(KillA)) + .addReg(NewVR, getKillRegState(true)); InsInstrs.push_back(MIB2); // Record old instructions for deletion. @@ -423,31 +353,6 @@ DelInstrs.push_back(&Root); } -void PPCInstrInfo::genAlternativeCodeSequence( - MachineInstr &Root, - MachineCombinerPattern::MC_PATTERN Pattern, - SmallVectorImpl &InsInstrs, - SmallVectorImpl &DelInstrs, - DenseMap &InstIdxForVirtReg) const { - MachineRegisterInfo &MRI = Root.getParent()->getParent()->getRegInfo(); - - // Select the previous instruction in the sequence based on the input pattern. - MachineInstr *Prev = nullptr; - switch (Pattern) { - case MachineCombinerPattern::MC_REASSOC_AX_BY: - case MachineCombinerPattern::MC_REASSOC_XA_BY: - Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg()); - break; - case MachineCombinerPattern::MC_REASSOC_AX_YB: - case MachineCombinerPattern::MC_REASSOC_XA_YB: - Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg()); - } - assert(Prev && "Unknown pattern for machine combiner"); - - reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg); - return; -} - // Detect 32 -> 64-bit extensions where we may reuse the low sub-register. bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, Index: lib/Target/X86/X86InstrInfo.h =================================================================== --- lib/Target/X86/X86InstrInfo.h +++ lib/Target/X86/X86InstrInfo.h @@ -26,19 +26,6 @@ class X86RegisterInfo; class X86Subtarget; - namespace MachineCombinerPattern { - enum MC_PATTERN : int { - // These are commutative variants for reassociating a computation chain - // of the form: - // B = A op X (Prev) - // C = B op Y (Root) - MC_REASSOC_AX_BY = 0, - MC_REASSOC_AX_YB = 1, - MC_REASSOC_XA_BY = 2, - MC_REASSOC_XA_YB = 3, - }; - } // end namespace MachineCombinerPattern - namespace X86 { // X86 specific condition code. These correspond to X86_*_COND in // X86InstrInfo.td. They must be kept in synch. @@ -452,25 +439,25 @@ const MachineInstr *UseMI, unsigned UseIdx) const override; - - bool useMachineCombiner() const override { + bool useMachineCombiner() const override { return false; } + + bool useMachineReassociation(bool UnsafeFPMath) const override { return true; } - - /// Return true when there is potentially a faster code sequence - /// for an instruction chain ending in . All potential patterns are - /// output in the array. - bool getMachineCombinerPatterns( - MachineInstr &Root, - SmallVectorImpl &P) const override; - - /// When getMachineCombinerPatterns() finds a pattern, this function generates - /// the instructions that could replace the original code sequence. - void genAlternativeCodeSequence( - MachineInstr &Root, MachineCombinerPattern::MC_PATTERN P, - SmallVectorImpl &InsInstrs, - SmallVectorImpl &DelInstrs, - DenseMap &InstrIdxForVirtReg) const override; + + /// Return true when Inst is both associative and commutative + bool isAssociativeAndCommutative(const MachineInstr &Inst) const override; + + /// Return true when Inst has reassociable operands in the same MBB + bool hasReassociableOperands(const MachineInstr &Inst, + const MachineBasicBlock *MBB) const override; + + void reassociateOps( + MachineInstr &Root, MachineInstr &Prev, + MachineCombinerPattern::MC_PATTERN Pattern, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs, + DenseMap &InstrIdxForVirtReg) const override; /// analyzeCompare - For a comparison instruction, return the source registers /// in SrcReg and SrcReg2 if having two register operands, and the value it Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -6324,8 +6324,8 @@ return isHighLatencyDef(DefMI->getOpcode()); } -static bool hasReassociableOperands(const MachineInstr &Inst, - const MachineBasicBlock *MBB) { +bool X86InstrInfo::hasReassociableOperands(const MachineInstr &Inst, + const MachineBasicBlock *MBB) const { assert((Inst.getNumOperands() == 3 || Inst.getNumOperands() == 4) && "Reassociation needs binary operators"); const MachineOperand &Op1 = Inst.getOperand(1); @@ -6345,7 +6345,7 @@ if (!Inst.getOperand(3).isDead()) return false; } - + // We need virtual register definitions for the operands that we will // reassociate. MachineInstr *MI1 = nullptr; @@ -6362,36 +6362,11 @@ return false; } -static bool hasReassociableSibling(const MachineInstr &Inst, bool &Commuted) { - const MachineBasicBlock *MBB = Inst.getParent(); - const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(1).getReg()); - MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg()); - unsigned AssocOpcode = Inst.getOpcode(); - - // If only one operand has the same opcode and it's the second source operand, - // the operands must be commuted. - Commuted = MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode; - if (Commuted) - std::swap(MI1, MI2); - - // 1. The previous instruction must be the same type as Inst. - // 2. The previous instruction must have virtual register definitions for its - // operands in the same basic block as Inst. - // 3. The previous instruction's result must only be used by Inst. - if (MI1->getOpcode() == AssocOpcode && - hasReassociableOperands(*MI1, MBB) && - MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg())) - return true; - - return false; -} - // TODO: There are many more machine instruction opcodes to match: // 1. Other data types (integer, vectors) // 2. Other math / logic operations (xor, or) // 3. Other forms of the same operation (intrinsics and other variants) -static bool isAssociativeAndCommutative(const MachineInstr &Inst) { +bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const { switch (Inst.getOpcode()) { case X86::AND8rr: case X86::AND16rr: @@ -6464,59 +6439,6 @@ } } -/// Return true if the input instruction is part of a chain of dependent ops -/// that are suitable for reassociation, otherwise return false. -/// If the instruction's operands must be commuted to have a previous -/// instruction of the same type define the first source operand, Commuted will -/// be set to true. -static bool isReassociationCandidate(const MachineInstr &Inst, bool &Commuted) { - // 1. The operation must be associative and commutative. - // 2. The instruction must have virtual register definitions for its - // operands in the same basic block. - // 3. The instruction must have a reassociable sibling. - if (isAssociativeAndCommutative(Inst) && - hasReassociableOperands(Inst, Inst.getParent()) && - hasReassociableSibling(Inst, Commuted)) - return true; - - return false; -} - -// FIXME: This has the potential to be expensive (compile time) while not -// improving the code at all. Some ways to limit the overhead: -// 1. Track successful transforms; bail out if hit rate gets too low. -// 2. Only enable at -O3 or some other non-default optimization level. -// 3. Pre-screen pattern candidates here: if an operand of the previous -// instruction is known to not increase the critical path, then don't match -// that pattern. -bool X86InstrInfo::getMachineCombinerPatterns(MachineInstr &Root, - SmallVectorImpl &Patterns) const { - // TODO: There is nothing x86-specific here except the instruction type. - // This logic could be hoisted into the machine combiner pass itself. - - // Look for this reassociation pattern: - // B = A op X (Prev) - // C = B op Y (Root) - - bool Commute; - if (isReassociationCandidate(Root, Commute)) { - // We found a sequence of instructions that may be suitable for a - // reassociation of operands to increase ILP. Specify each commutation - // possibility for the Prev instruction in the sequence and let the - // machine combiner decide if changing the operands is worthwhile. - if (Commute) { - Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_YB); - Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_YB); - } else { - Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_BY); - Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_BY); - } - return true; - } - - return false; -} - /// This is an architecture-specific helper function of reassociateOps. /// Set special operand attributes for new instructions after reassociation. static void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2, @@ -6528,7 +6450,7 @@ assert(NewMI1.getNumOperands() == 4 && NewMI2.getNumOperands() == 4 && "Unexpected instruction type for reassociation"); - + MachineOperand &OldOp1 = OldMI1.getOperand(3); MachineOperand &OldOp2 = OldMI2.getOperand(3); MachineOperand &NewOp1 = NewMI1.getOperand(3); @@ -6561,11 +6483,12 @@ /// ===> /// B = X op Y /// C = A op B -static void reassociateOps(MachineInstr &Root, MachineInstr &Prev, - MachineCombinerPattern::MC_PATTERN Pattern, - SmallVectorImpl &InsInstrs, - SmallVectorImpl &DelInstrs, - DenseMap &InstrIdxForVirtReg) { +void X86InstrInfo::reassociateOps( + MachineInstr &Root, MachineInstr &Prev, + MachineCombinerPattern::MC_PATTERN Pattern, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs, + DenseMap &InstrIdxForVirtReg) const { MachineFunction *MF = Root.getParent()->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); @@ -6576,10 +6499,10 @@ // operands may be commuted. Each row corresponds to a pattern value, // and each column specifies the index of A, B, X, Y. unsigned OpIdx[4][4] = { - { 1, 1, 2, 2 }, - { 1, 2, 2, 1 }, - { 2, 1, 1, 2 }, - { 2, 2, 1, 1 } + {1, 1, 2, 2}, + {1, 2, 2, 1}, + {2, 1, 1, 2}, + {2, 2, 1, 1} }; MachineOperand &OpA = Prev.getOperand(OpIdx[Pattern][0]); @@ -6618,13 +6541,13 @@ // Create new instructions for insertion. MachineInstrBuilder MIB1 = - BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR) - .addReg(RegX, getKillRegState(KillX)) - .addReg(RegY, getKillRegState(KillY)); + BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR) + .addReg(RegX, getKillRegState(KillX)) + .addReg(RegY, getKillRegState(KillY)); MachineInstrBuilder MIB2 = - BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC) - .addReg(RegA, getKillRegState(KillA)) - .addReg(NewVR, getKillRegState(true)); + BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC) + .addReg(RegA, getKillRegState(KillA)) + .addReg(NewVR, getKillRegState(true)); setSpecialOperandAttr(Root, Prev, *MIB1, *MIB2); @@ -6635,31 +6558,6 @@ DelInstrs.push_back(&Root); } -void X86InstrInfo::genAlternativeCodeSequence( - MachineInstr &Root, - MachineCombinerPattern::MC_PATTERN Pattern, - SmallVectorImpl &InsInstrs, - SmallVectorImpl &DelInstrs, - DenseMap &InstIdxForVirtReg) const { - MachineRegisterInfo &MRI = Root.getParent()->getParent()->getRegInfo(); - - // Select the previous instruction in the sequence based on the input pattern. - MachineInstr *Prev = nullptr; - switch (Pattern) { - case MachineCombinerPattern::MC_REASSOC_AX_BY: - case MachineCombinerPattern::MC_REASSOC_XA_BY: - Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg()); - break; - case MachineCombinerPattern::MC_REASSOC_AX_YB: - case MachineCombinerPattern::MC_REASSOC_XA_YB: - Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg()); - } - assert(Prev && "Unknown pattern for machine combiner"); - - reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg); - return; -} - std::pair X86InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { return std::make_pair(TF, 0u);