Index: include/llvm/CodeGen/MachineCombinerPattern.h =================================================================== --- include/llvm/CodeGen/MachineCombinerPattern.h +++ include/llvm/CodeGen/MachineCombinerPattern.h @@ -22,7 +22,31 @@ /// namespace MachineCombinerPattern { // Forward declaration -enum MC_PATTERN : int; +enum MC_PATTERN : int { + // These are commutative variants for reassociating a computation chain + // of the form: + // B = A op X (Prev) + // C = B op Y (Root) + MC_REASSOC_AX_BY = 0, + MC_REASSOC_AX_YB = 1, + MC_REASSOC_XA_BY = 2, + MC_REASSOC_XA_YB = 3, + + /// Enumeration of instruction pattern supported by machine combiner + MC_NONE = 4, + MC_MULADDW_OP1 = 5, + MC_MULADDW_OP2 = 6, + MC_MULSUBW_OP1 = 7, + MC_MULSUBW_OP2 = 8, + MC_MULADDWI_OP1 = 9, + MC_MULSUBWI_OP1 = 10, + MC_MULADDX_OP1 = 11, + MC_MULADDX_OP2 = 12, + MC_MULSUBX_OP1 = 13, + MC_MULSUBX_OP2 = 14, + MC_MULADDXI_OP1 = 15, + MC_MULSUBXI_OP1 = 16 +}; } // end namespace MachineCombinerPattern } // end namespace llvm Index: include/llvm/Target/TargetInstrInfo.h =================================================================== --- include/llvm/Target/TargetInstrInfo.h +++ include/llvm/Target/TargetInstrInfo.h @@ -727,10 +727,28 @@ /// \param Pattern - Vector of possible combination patterns virtual bool getMachineCombinerPatterns( MachineInstr &Root, - SmallVectorImpl &Pattern) const { + SmallVectorImpl &Patterns) const; + + /// Return true if the input \P Inst is part of a chain of dependent ops + /// that are suitable for reassociation, otherwise return false. + /// If the instruction's operands must be commuted to have a previous + /// instruction of the same type define the first source operand, \P Commuted + /// will + /// be set to true. + bool isReassociationCandidate(const MachineInstr &Inst, bool &Commuted) const; + + /// Return true when \P Inst is both associative and commutative. + virtual bool isAssociativeAndCommutative(const MachineInstr &Inst) const { return false; } + /// Return true when \P Inst has reassociable operands in the same \P MBB. + virtual bool hasReassociableOperands(const MachineInstr &Inst, + const MachineBasicBlock *MBB) const; + + /// Return true when \P Inst has reassociable sibling. + bool hasReassociableSibling(const MachineInstr &Inst, bool &Commuted) const; + /// When getMachineCombinerPatterns() finds patterns, this function generates /// the instructions that could replace the original code sequence. The client /// has to decide whether the actual replacement is beneficial or not. @@ -745,9 +763,23 @@ MachineInstr &Root, MachineCombinerPattern::MC_PATTERN Pattern, SmallVectorImpl &InsInstrs, SmallVectorImpl &DelInstrs, - DenseMap &InstrIdxForVirtReg) const { + DenseMap &InstrIdxForVirtReg) const; + + /// Attempt to reassociate \P Root and \P Prev according to \P Pattern to + /// reduce critical path length. + void reassociateOps(MachineInstr &Root, MachineInstr &Prev, + MachineCombinerPattern::MC_PATTERN Pattern, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs, + DenseMap &InstrIdxForVirtReg) const; + + /// This is an architecture-specific helper function of reassociateOps. + /// Set special operand attributes for new instructions after reassociation. + virtual void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2, + MachineInstr &NewMI1, + MachineInstr &NewMI2) const { return; - } + }; /// Return true when a target supports MachineCombiner. virtual bool useMachineCombiner() const { return false; } Index: lib/CodeGen/TargetInstrInfo.cpp =================================================================== --- lib/CodeGen/TargetInstrInfo.cpp +++ lib/CodeGen/TargetInstrInfo.cpp @@ -511,6 +511,205 @@ return --Pos; } +bool TargetInstrInfo::hasReassociableOperands( + const MachineInstr &Inst, const MachineBasicBlock *MBB) const { + const MachineOperand &Op1 = Inst.getOperand(1); + const MachineOperand &Op2 = Inst.getOperand(2); + const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + + // We need virtual register definitions for the operands that we will + // reassociate. + MachineInstr *MI1 = nullptr; + MachineInstr *MI2 = nullptr; + if (Op1.isReg() && TargetRegisterInfo::isVirtualRegister(Op1.getReg())) + MI1 = MRI.getUniqueVRegDef(Op1.getReg()); + if (Op2.isReg() && TargetRegisterInfo::isVirtualRegister(Op2.getReg())) + MI2 = MRI.getUniqueVRegDef(Op2.getReg()); + + // And they need to be in the trace (otherwise, they won't have a depth). + if (MI1 && MI2 && MI1->getParent() == MBB && MI2->getParent() == MBB) + return true; + + return false; +} + +bool TargetInstrInfo::hasReassociableSibling(const MachineInstr &Inst, + bool &Commuted) const { + const MachineBasicBlock *MBB = Inst.getParent(); + const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(1).getReg()); + MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg()); + unsigned AssocOpcode = Inst.getOpcode(); + + // If only one operand has the same opcode and it's the second source operand, + // the operands must be commuted. + Commuted = MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode; + if (Commuted) + std::swap(MI1, MI2); + + // 1. The previous instruction must be the same type as Inst. + // 2. The previous instruction must have virtual register definitions for its + // operands in the same basic block as Inst. + // 3. The previous instruction's result must only be used by Inst. + if (MI1->getOpcode() == AssocOpcode && hasReassociableOperands(*MI1, MBB) && + MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg())) + return true; + + return false; +} + +bool TargetInstrInfo::isReassociationCandidate(const MachineInstr &Inst, + bool &Commuted) const { + // 1. The operation must be associative and commutative. + // 2. The instruction must have virtual register definitions for its + // operands in the same basic block. + // 3. The instruction must have a reassociable sibling. + if (isAssociativeAndCommutative(Inst) && + hasReassociableOperands(Inst, Inst.getParent()) && + hasReassociableSibling(Inst, Commuted)) + return true; + + return false; +} + +// FIXME: This has the potential to be expensive (compile time) while not +// improving the code at all. Some ways to limit the overhead: +// 1. Track successful transforms; bail out if hit rate gets too low. +// 2. Only enable at -O3 or some other non-default optimization level. +// 3. Pre-screen pattern candidates here: if an operand of the previous +// instruction is known to not increase the critical path, then don't match +// that pattern. +bool TargetInstrInfo::getMachineCombinerPatterns( + MachineInstr &Root, + SmallVectorImpl &Patterns) const { + // Look for this reassociation pattern: + // B = A op X (Prev) + // C = B op Y (Root) + + bool Commute; + if (isReassociationCandidate(Root, Commute)) { + // We found a sequence of instructions that may be suitable for a + // reassociation of operands to increase ILP. Specify each commutation + // possibility for the Prev instruction in the sequence and let the + // machine combiner decide if changing the operands is worthwhile. + if (Commute) { + Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_YB); + Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_YB); + } else { + Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_BY); + Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_BY); + } + return true; + } + + return false; +} + +/// Attempt the following reassociation to reduce critical path length: +/// B = A op X (Prev) +/// C = B op Y (Root) +/// ===> +/// B = X op Y +/// C = A op B +void TargetInstrInfo::reassociateOps( + MachineInstr &Root, MachineInstr &Prev, + MachineCombinerPattern::MC_PATTERN Pattern, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs, + DenseMap &InstrIdxForVirtReg) const { + MachineFunction *MF = Root.getParent()->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + const TargetRegisterClass *RC = Root.getRegClassConstraint(0, TII, TRI); + + // This array encodes the operand index for each parameter because the + // operands may be commuted. Each row corresponds to a pattern value, + // and each column specifies the index of A, B, X, Y. + unsigned OpIdx[4][4] = { + {1, 1, 2, 2}, {1, 2, 2, 1}, {2, 1, 1, 2}, {2, 2, 1, 1}}; + + MachineOperand &OpA = Prev.getOperand(OpIdx[Pattern][0]); + MachineOperand &OpB = Root.getOperand(OpIdx[Pattern][1]); + MachineOperand &OpX = Prev.getOperand(OpIdx[Pattern][2]); + MachineOperand &OpY = Root.getOperand(OpIdx[Pattern][3]); + MachineOperand &OpC = Root.getOperand(0); + + unsigned RegA = OpA.getReg(); + unsigned RegB = OpB.getReg(); + unsigned RegX = OpX.getReg(); + unsigned RegY = OpY.getReg(); + unsigned RegC = OpC.getReg(); + + if (TargetRegisterInfo::isVirtualRegister(RegA)) + MRI.constrainRegClass(RegA, RC); + if (TargetRegisterInfo::isVirtualRegister(RegB)) + MRI.constrainRegClass(RegB, RC); + if (TargetRegisterInfo::isVirtualRegister(RegX)) + MRI.constrainRegClass(RegX, RC); + if (TargetRegisterInfo::isVirtualRegister(RegY)) + MRI.constrainRegClass(RegY, RC); + if (TargetRegisterInfo::isVirtualRegister(RegC)) + MRI.constrainRegClass(RegC, RC); + + // Create a new virtual register for the result of (X op Y) instead of + // recycling RegB because the MachineCombiner's computation of the critical + // path requires a new register definition rather than an existing one. + unsigned NewVR = MRI.createVirtualRegister(RC); + InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); + + unsigned Opcode = Root.getOpcode(); + bool KillA = OpA.isKill(); + bool KillX = OpX.isKill(); + bool KillY = OpY.isKill(); + + // Create new instructions for insertion. + MachineInstrBuilder MIB1 = + BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR) + .addReg(RegX, getKillRegState(KillX)) + .addReg(RegY, getKillRegState(KillY)); + MachineInstrBuilder MIB2 = + BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC) + .addReg(RegA, getKillRegState(KillA)) + .addReg(NewVR, getKillRegState(true)); + + setSpecialOperandAttr(Root, Prev, *MIB1, *MIB2); + + // Record new instructions for insertion and old instructions for deletion. + InsInstrs.push_back(MIB1); + InsInstrs.push_back(MIB2); + DelInstrs.push_back(&Prev); + DelInstrs.push_back(&Root); +} + +void TargetInstrInfo::genAlternativeCodeSequence( + MachineInstr &Root, MachineCombinerPattern::MC_PATTERN Pattern, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs, + DenseMap &InstIdxForVirtReg) const { + MachineRegisterInfo &MRI = Root.getParent()->getParent()->getRegInfo(); + + // Select the previous instruction in the sequence based on the input pattern. + MachineInstr *Prev = nullptr; + switch (Pattern) { + case MachineCombinerPattern::MC_REASSOC_AX_BY: + case MachineCombinerPattern::MC_REASSOC_XA_BY: + Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg()); + break; + case MachineCombinerPattern::MC_REASSOC_AX_YB: + case MachineCombinerPattern::MC_REASSOC_XA_YB: + Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg()); + break; + default: + break; + } + + assert(Prev && "Unknown pattern for machine combiner"); + + reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg); + return; +} + /// foldMemoryOperand - Same as the previous version except it allows folding /// of any load and store from / to any address, not just from a specific /// stack slot. Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "AArch64InstrInfo.h" -#include "AArch64MachineCombinerPattern.h" #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/CodeGen/MachineFrameInfo.h" Index: lib/Target/AArch64/AArch64MachineCombinerPattern.h =================================================================== --- lib/Target/AArch64/AArch64MachineCombinerPattern.h +++ /dev/null @@ -1,42 +0,0 @@ -//===- AArch64MachineCombinerPattern.h -===// -//===- AArch64 instruction pattern supported by combiner -===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines instruction pattern supported by combiner -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64MACHINECOMBINERPATTERN_H -#define LLVM_LIB_TARGET_AARCH64_AARCH64MACHINECOMBINERPATTERN_H - -namespace llvm { - -/// Enumeration of instruction pattern supported by machine combiner -/// -/// -namespace MachineCombinerPattern { -enum MC_PATTERN : int { - MC_NONE = 0, - MC_MULADDW_OP1 = 1, - MC_MULADDW_OP2 = 2, - MC_MULSUBW_OP1 = 3, - MC_MULSUBW_OP2 = 4, - MC_MULADDWI_OP1 = 5, - MC_MULSUBWI_OP1 = 6, - MC_MULADDX_OP1 = 7, - MC_MULADDX_OP2 = 8, - MC_MULSUBX_OP1 = 9, - MC_MULSUBX_OP2 = 10, - MC_MULADDXI_OP1 = 11, - MC_MULSUBXI_OP1 = 12 -}; -} // end namespace MachineCombinerPattern -} // end namespace llvm - -#endif Index: lib/Target/PowerPC/PPCInstrInfo.h =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.h +++ lib/Target/PowerPC/PPCInstrInfo.h @@ -63,19 +63,6 @@ }; } // end namespace PPCII -namespace MachineCombinerPattern { -enum MC_PATTERN : int { - // These are commutative variants for reassociating a computation chain - // of the form: - // B = A op X (Prev) - // C = B op Y (Root) - MC_REASSOC_AX_BY = 0, - MC_REASSOC_AX_YB = 1, - MC_REASSOC_XA_BY = 2, - MC_REASSOC_XA_YB = 3, -}; -} // end namespace MachineCombinerPattern - class PPCSubtarget; class PPCInstrInfo : public PPCGenInstrInfo { PPCSubtarget &Subtarget; @@ -135,21 +122,15 @@ bool useMachineCombiner() const override { return true; } - + /// Return true when there is potentially a faster code sequence /// for an instruction chain ending in . All potential patterns are /// output in the array. bool getMachineCombinerPatterns( MachineInstr &Root, SmallVectorImpl &P) const override; - - /// When getMachineCombinerPatterns() finds a pattern, this function generates - /// the instructions that could replace the original code sequence. - void genAlternativeCodeSequence( - MachineInstr &Root, MachineCombinerPattern::MC_PATTERN P, - SmallVectorImpl &InsInstrs, - SmallVectorImpl &DelInstrs, - DenseMap &InstrIdxForVirtReg) const override; + + bool isAssociativeAndCommutative(const MachineInstr &Inst) const override; bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, Index: lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.cpp +++ lib/Target/PowerPC/PPCInstrInfo.cpp @@ -189,52 +189,6 @@ return Latency; } -static bool hasVirtualRegDefsInBasicBlock(const MachineInstr &Inst, - const MachineBasicBlock *MBB) { - const MachineOperand &Op1 = Inst.getOperand(1); - const MachineOperand &Op2 = Inst.getOperand(2); - const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - - // We need virtual register definitions. - MachineInstr *MI1 = nullptr; - MachineInstr *MI2 = nullptr; - if (Op1.isReg() && TargetRegisterInfo::isVirtualRegister(Op1.getReg())) - MI1 = MRI.getUniqueVRegDef(Op1.getReg()); - if (Op2.isReg() && TargetRegisterInfo::isVirtualRegister(Op2.getReg())) - MI2 = MRI.getUniqueVRegDef(Op2.getReg()); - - // And they need to be in the trace (otherwise, they won't have a depth). - if (MI1 && MI2 && MI1->getParent() == MBB && MI2->getParent() == MBB) - return true; - - return false; -} - -static bool hasReassocSibling(const MachineInstr &Inst, bool &Commuted) { - const MachineBasicBlock *MBB = Inst.getParent(); - const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(1).getReg()); - MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg()); - unsigned AssocOpcode = Inst.getOpcode(); - - // If only one operand has the same opcode and it's the second source operand, - // the operands must be commuted. - Commuted = MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode; - if (Commuted) - std::swap(MI1, MI2); - - // 1. The previous instruction must be the same type as Inst. - // 2. The previous instruction must have virtual register definitions for its - // operands in the same basic block as Inst. - // 3. The previous instruction's result must only be used by Inst. - if (MI1->getOpcode() == AssocOpcode && - hasVirtualRegDefsInBasicBlock(*MI1, MBB) && - MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg())) - return true; - - return false; -} - // This function does not list all associative and commutative operations, but // only those worth feeding through the machine combiner in an attempt to // reduce the critical path. Mostly, this means floating-point operations, @@ -254,8 +208,8 @@ // // breaking the dependency between A and B, allowing them to be executed in // parallel (or back-to-back in a pipeline) instead of depending on each other. -static bool isAssociativeAndCommutative(unsigned Opcode) { - switch (Opcode) { +bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const { + switch (Inst.getOpcode()) { // FP Add: case PPC::FADD: case PPC::FADDS: @@ -288,26 +242,9 @@ } } -/// Return true if the input instruction is part of a chain of dependent ops -/// that are suitable for reassociation, otherwise return false. -/// If the instruction's operands must be commuted to have a previous -/// instruction of the same type define the first source operand, Commuted will -/// be set to true. -static bool isReassocCandidate(const MachineInstr &Inst, bool &Commuted) { - // 1. The operation must be associative and commutative. - // 2. The instruction must have virtual register definitions for its - // operands in the same basic block. - // 3. The instruction must have a reassociable sibling. - if (isAssociativeAndCommutative(Inst.getOpcode()) && - hasVirtualRegDefsInBasicBlock(Inst, Inst.getParent()) && - hasReassocSibling(Inst, Commuted)) - return true; - - return false; -} - -bool PPCInstrInfo::getMachineCombinerPatterns(MachineInstr &Root, - SmallVectorImpl &Patterns) const { +bool PPCInstrInfo::getMachineCombinerPatterns( + MachineInstr &Root, + SmallVectorImpl &Patterns) const { // Using the machine combiner in this way is potentially expensive, so // restrict to when aggressive optimizations are desired. if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOpt::Aggressive) @@ -317,135 +254,7 @@ if (!Root.getParent()->getParent()->getTarget().Options.UnsafeFPMath) return false; - // Look for this reassociation pattern: - // B = A op X (Prev) - // C = B op Y (Root) - - // FIXME: We should also match FMA operations here, where we consider the - // 'part' of the FMA, either the addition or the multiplication, paired with - // an actual addition or multiplication. - - bool Commute; - if (isReassocCandidate(Root, Commute)) { - // We found a sequence of instructions that may be suitable for a - // reassociation of operands to increase ILP. Specify each commutation - // possibility for the Prev instruction in the sequence and let the - // machine combiner decide if changing the operands is worthwhile. - if (Commute) { - Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_YB); - Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_YB); - } else { - Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_BY); - Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_BY); - } - return true; - } - - return false; -} - -/// Attempt the following reassociation to reduce critical path length: -/// B = A op X (Prev) -/// C = B op Y (Root) -/// ===> -/// B = X op Y -/// C = A op B -static void reassociateOps(MachineInstr &Root, MachineInstr &Prev, - MachineCombinerPattern::MC_PATTERN Pattern, - SmallVectorImpl &InsInstrs, - SmallVectorImpl &DelInstrs, - DenseMap &InstrIdxForVirtReg) { - MachineFunction *MF = Root.getParent()->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); - const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); - const TargetRegisterClass *RC = Root.getRegClassConstraint(0, TII, TRI); - - // This array encodes the operand index for each parameter because the - // operands may be commuted. Each row corresponds to a pattern value, - // and each column specifies the index of A, B, X, Y. - unsigned OpIdx[4][4] = { - { 1, 1, 2, 2 }, - { 1, 2, 2, 1 }, - { 2, 1, 1, 2 }, - { 2, 2, 1, 1 } - }; - - MachineOperand &OpA = Prev.getOperand(OpIdx[Pattern][0]); - MachineOperand &OpB = Root.getOperand(OpIdx[Pattern][1]); - MachineOperand &OpX = Prev.getOperand(OpIdx[Pattern][2]); - MachineOperand &OpY = Root.getOperand(OpIdx[Pattern][3]); - MachineOperand &OpC = Root.getOperand(0); - - unsigned RegA = OpA.getReg(); - unsigned RegB = OpB.getReg(); - unsigned RegX = OpX.getReg(); - unsigned RegY = OpY.getReg(); - unsigned RegC = OpC.getReg(); - - if (TargetRegisterInfo::isVirtualRegister(RegA)) - MRI.constrainRegClass(RegA, RC); - if (TargetRegisterInfo::isVirtualRegister(RegB)) - MRI.constrainRegClass(RegB, RC); - if (TargetRegisterInfo::isVirtualRegister(RegX)) - MRI.constrainRegClass(RegX, RC); - if (TargetRegisterInfo::isVirtualRegister(RegY)) - MRI.constrainRegClass(RegY, RC); - if (TargetRegisterInfo::isVirtualRegister(RegC)) - MRI.constrainRegClass(RegC, RC); - - // Create a new virtual register for the result of (X op Y) instead of - // recycling RegB because the MachineCombiner's computation of the critical - // path requires a new register definition rather than an existing one. - unsigned NewVR = MRI.createVirtualRegister(RC); - InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); - - unsigned Opcode = Root.getOpcode(); - bool KillA = OpA.isKill(); - bool KillX = OpX.isKill(); - bool KillY = OpY.isKill(); - - // Create new instructions for insertion. - MachineInstrBuilder MIB1 = - BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR) - .addReg(RegX, getKillRegState(KillX)) - .addReg(RegY, getKillRegState(KillY)); - InsInstrs.push_back(MIB1); - - MachineInstrBuilder MIB2 = - BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC) - .addReg(RegA, getKillRegState(KillA)) - .addReg(NewVR, getKillRegState(true)); - InsInstrs.push_back(MIB2); - - // Record old instructions for deletion. - DelInstrs.push_back(&Prev); - DelInstrs.push_back(&Root); -} - -void PPCInstrInfo::genAlternativeCodeSequence( - MachineInstr &Root, - MachineCombinerPattern::MC_PATTERN Pattern, - SmallVectorImpl &InsInstrs, - SmallVectorImpl &DelInstrs, - DenseMap &InstIdxForVirtReg) const { - MachineRegisterInfo &MRI = Root.getParent()->getParent()->getRegInfo(); - - // Select the previous instruction in the sequence based on the input pattern. - MachineInstr *Prev = nullptr; - switch (Pattern) { - case MachineCombinerPattern::MC_REASSOC_AX_BY: - case MachineCombinerPattern::MC_REASSOC_XA_BY: - Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg()); - break; - case MachineCombinerPattern::MC_REASSOC_AX_YB: - case MachineCombinerPattern::MC_REASSOC_XA_YB: - Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg()); - } - assert(Prev && "Unknown pattern for machine combiner"); - - reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg); - return; + return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns); } // Detect 32 -> 64-bit extensions where we may reuse the low sub-register. Index: lib/Target/X86/X86InstrInfo.h =================================================================== --- lib/Target/X86/X86InstrInfo.h +++ lib/Target/X86/X86InstrInfo.h @@ -26,19 +26,6 @@ class X86RegisterInfo; class X86Subtarget; - namespace MachineCombinerPattern { - enum MC_PATTERN : int { - // These are commutative variants for reassociating a computation chain - // of the form: - // B = A op X (Prev) - // C = B op Y (Root) - MC_REASSOC_AX_BY = 0, - MC_REASSOC_AX_YB = 1, - MC_REASSOC_XA_BY = 2, - MC_REASSOC_XA_YB = 3, - }; - } // end namespace MachineCombinerPattern - namespace X86 { // X86 specific condition code. These correspond to X86_*_COND in // X86InstrInfo.td. They must be kept in synch. @@ -451,26 +438,19 @@ const MachineInstr *DefMI, unsigned DefIdx, const MachineInstr *UseMI, unsigned UseIdx) const override; - bool useMachineCombiner() const override { return true; } - - /// Return true when there is potentially a faster code sequence - /// for an instruction chain ending in . All potential patterns are - /// output in the array. - bool getMachineCombinerPatterns( - MachineInstr &Root, - SmallVectorImpl &P) const override; - - /// When getMachineCombinerPatterns() finds a pattern, this function generates - /// the instructions that could replace the original code sequence. - void genAlternativeCodeSequence( - MachineInstr &Root, MachineCombinerPattern::MC_PATTERN P, - SmallVectorImpl &InsInstrs, - SmallVectorImpl &DelInstrs, - DenseMap &InstrIdxForVirtReg) const override; + + bool isAssociativeAndCommutative(const MachineInstr &Inst) const override; + + bool hasReassociableOperands(const MachineInstr &Inst, + const MachineBasicBlock *MBB) const override; + + void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2, + MachineInstr &NewMI1, + MachineInstr &NewMI2) const override; /// analyzeCompare - For a comparison instruction, return the source registers /// in SrcReg and SrcReg2 if having two register operands, and the value it Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -6324,13 +6324,10 @@ return isHighLatencyDef(DefMI->getOpcode()); } -static bool hasReassociableOperands(const MachineInstr &Inst, - const MachineBasicBlock *MBB) { +bool X86InstrInfo::hasReassociableOperands(const MachineInstr &Inst, + const MachineBasicBlock *MBB) const { assert((Inst.getNumOperands() == 3 || Inst.getNumOperands() == 4) && "Reassociation needs binary operators"); - const MachineOperand &Op1 = Inst.getOperand(1); - const MachineOperand &Op2 = Inst.getOperand(2); - const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); // Integer binary math/logic instructions have a third source operand: // the EFLAGS register. That operand must be both defined here and never @@ -6345,53 +6342,15 @@ if (!Inst.getOperand(3).isDead()) return false; } - - // We need virtual register definitions for the operands that we will - // reassociate. - MachineInstr *MI1 = nullptr; - MachineInstr *MI2 = nullptr; - if (Op1.isReg() && TargetRegisterInfo::isVirtualRegister(Op1.getReg())) - MI1 = MRI.getUniqueVRegDef(Op1.getReg()); - if (Op2.isReg() && TargetRegisterInfo::isVirtualRegister(Op2.getReg())) - MI2 = MRI.getUniqueVRegDef(Op2.getReg()); - - // And they need to be in the trace (otherwise, they won't have a depth). - if (MI1 && MI2 && MI1->getParent() == MBB && MI2->getParent() == MBB) - return true; - - return false; -} -static bool hasReassociableSibling(const MachineInstr &Inst, bool &Commuted) { - const MachineBasicBlock *MBB = Inst.getParent(); - const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(1).getReg()); - MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg()); - unsigned AssocOpcode = Inst.getOpcode(); - - // If only one operand has the same opcode and it's the second source operand, - // the operands must be commuted. - Commuted = MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode; - if (Commuted) - std::swap(MI1, MI2); - - // 1. The previous instruction must be the same type as Inst. - // 2. The previous instruction must have virtual register definitions for its - // operands in the same basic block as Inst. - // 3. The previous instruction's result must only be used by Inst. - if (MI1->getOpcode() == AssocOpcode && - hasReassociableOperands(*MI1, MBB) && - MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg())) - return true; - - return false; + return TargetInstrInfo::hasReassociableOperands(Inst, MBB); } // TODO: There are many more machine instruction opcodes to match: // 1. Other data types (integer, vectors) // 2. Other math / logic operations (xor, or) // 3. Other forms of the same operation (intrinsics and other variants) -static bool isAssociativeAndCommutative(const MachineInstr &Inst) { +bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const { switch (Inst.getOpcode()) { case X86::AND8rr: case X86::AND16rr: @@ -6464,63 +6423,12 @@ } } -/// Return true if the input instruction is part of a chain of dependent ops -/// that are suitable for reassociation, otherwise return false. -/// If the instruction's operands must be commuted to have a previous -/// instruction of the same type define the first source operand, Commuted will -/// be set to true. -static bool isReassociationCandidate(const MachineInstr &Inst, bool &Commuted) { - // 1. The operation must be associative and commutative. - // 2. The instruction must have virtual register definitions for its - // operands in the same basic block. - // 3. The instruction must have a reassociable sibling. - if (isAssociativeAndCommutative(Inst) && - hasReassociableOperands(Inst, Inst.getParent()) && - hasReassociableSibling(Inst, Commuted)) - return true; - - return false; -} - -// FIXME: This has the potential to be expensive (compile time) while not -// improving the code at all. Some ways to limit the overhead: -// 1. Track successful transforms; bail out if hit rate gets too low. -// 2. Only enable at -O3 or some other non-default optimization level. -// 3. Pre-screen pattern candidates here: if an operand of the previous -// instruction is known to not increase the critical path, then don't match -// that pattern. -bool X86InstrInfo::getMachineCombinerPatterns(MachineInstr &Root, - SmallVectorImpl &Patterns) const { - // TODO: There is nothing x86-specific here except the instruction type. - // This logic could be hoisted into the machine combiner pass itself. - - // Look for this reassociation pattern: - // B = A op X (Prev) - // C = B op Y (Root) - - bool Commute; - if (isReassociationCandidate(Root, Commute)) { - // We found a sequence of instructions that may be suitable for a - // reassociation of operands to increase ILP. Specify each commutation - // possibility for the Prev instruction in the sequence and let the - // machine combiner decide if changing the operands is worthwhile. - if (Commute) { - Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_YB); - Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_YB); - } else { - Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_BY); - Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_BY); - } - return true; - } - - return false; -} - /// This is an architecture-specific helper function of reassociateOps. /// Set special operand attributes for new instructions after reassociation. -static void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2, - MachineInstr &NewMI1, MachineInstr &NewMI2) { +void X86InstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1, + MachineInstr &OldMI2, + MachineInstr &NewMI1, + MachineInstr &NewMI2) const { // Integer instructions define an implicit EFLAGS source register operand as // the third source (fourth total) operand. if (OldMI1.getNumOperands() != 4 || OldMI2.getNumOperands() != 4) @@ -6528,7 +6436,7 @@ assert(NewMI1.getNumOperands() == 4 && NewMI2.getNumOperands() == 4 && "Unexpected instruction type for reassociation"); - + MachineOperand &OldOp1 = OldMI1.getOperand(3); MachineOperand &OldOp2 = OldMI2.getOperand(3); MachineOperand &NewOp1 = NewMI1.getOperand(3); @@ -6555,111 +6463,6 @@ NewOp2.setIsDead(); } -/// Attempt the following reassociation to reduce critical path length: -/// B = A op X (Prev) -/// C = B op Y (Root) -/// ===> -/// B = X op Y -/// C = A op B -static void reassociateOps(MachineInstr &Root, MachineInstr &Prev, - MachineCombinerPattern::MC_PATTERN Pattern, - SmallVectorImpl &InsInstrs, - SmallVectorImpl &DelInstrs, - DenseMap &InstrIdxForVirtReg) { - MachineFunction *MF = Root.getParent()->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); - const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); - const TargetRegisterClass *RC = Root.getRegClassConstraint(0, TII, TRI); - - // This array encodes the operand index for each parameter because the - // operands may be commuted. Each row corresponds to a pattern value, - // and each column specifies the index of A, B, X, Y. - unsigned OpIdx[4][4] = { - { 1, 1, 2, 2 }, - { 1, 2, 2, 1 }, - { 2, 1, 1, 2 }, - { 2, 2, 1, 1 } - }; - - MachineOperand &OpA = Prev.getOperand(OpIdx[Pattern][0]); - MachineOperand &OpB = Root.getOperand(OpIdx[Pattern][1]); - MachineOperand &OpX = Prev.getOperand(OpIdx[Pattern][2]); - MachineOperand &OpY = Root.getOperand(OpIdx[Pattern][3]); - MachineOperand &OpC = Root.getOperand(0); - - unsigned RegA = OpA.getReg(); - unsigned RegB = OpB.getReg(); - unsigned RegX = OpX.getReg(); - unsigned RegY = OpY.getReg(); - unsigned RegC = OpC.getReg(); - - if (TargetRegisterInfo::isVirtualRegister(RegA)) - MRI.constrainRegClass(RegA, RC); - if (TargetRegisterInfo::isVirtualRegister(RegB)) - MRI.constrainRegClass(RegB, RC); - if (TargetRegisterInfo::isVirtualRegister(RegX)) - MRI.constrainRegClass(RegX, RC); - if (TargetRegisterInfo::isVirtualRegister(RegY)) - MRI.constrainRegClass(RegY, RC); - if (TargetRegisterInfo::isVirtualRegister(RegC)) - MRI.constrainRegClass(RegC, RC); - - // Create a new virtual register for the result of (X op Y) instead of - // recycling RegB because the MachineCombiner's computation of the critical - // path requires a new register definition rather than an existing one. - unsigned NewVR = MRI.createVirtualRegister(RC); - InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); - - unsigned Opcode = Root.getOpcode(); - bool KillA = OpA.isKill(); - bool KillX = OpX.isKill(); - bool KillY = OpY.isKill(); - - // Create new instructions for insertion. - MachineInstrBuilder MIB1 = - BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR) - .addReg(RegX, getKillRegState(KillX)) - .addReg(RegY, getKillRegState(KillY)); - MachineInstrBuilder MIB2 = - BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC) - .addReg(RegA, getKillRegState(KillA)) - .addReg(NewVR, getKillRegState(true)); - - setSpecialOperandAttr(Root, Prev, *MIB1, *MIB2); - - // Record new instructions for insertion and old instructions for deletion. - InsInstrs.push_back(MIB1); - InsInstrs.push_back(MIB2); - DelInstrs.push_back(&Prev); - DelInstrs.push_back(&Root); -} - -void X86InstrInfo::genAlternativeCodeSequence( - MachineInstr &Root, - MachineCombinerPattern::MC_PATTERN Pattern, - SmallVectorImpl &InsInstrs, - SmallVectorImpl &DelInstrs, - DenseMap &InstIdxForVirtReg) const { - MachineRegisterInfo &MRI = Root.getParent()->getParent()->getRegInfo(); - - // Select the previous instruction in the sequence based on the input pattern. - MachineInstr *Prev = nullptr; - switch (Pattern) { - case MachineCombinerPattern::MC_REASSOC_AX_BY: - case MachineCombinerPattern::MC_REASSOC_XA_BY: - Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg()); - break; - case MachineCombinerPattern::MC_REASSOC_AX_YB: - case MachineCombinerPattern::MC_REASSOC_XA_YB: - Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg()); - } - assert(Prev && "Unknown pattern for machine combiner"); - - reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg); - return; -} - std::pair X86InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { return std::make_pair(TF, 0u);