Index: include/llvm/CodeGen/ReachingPhysDefs.h =================================================================== --- /dev/null +++ include/llvm/CodeGen/ReachingPhysDefs.h @@ -0,0 +1,204 @@ +//===- llvm/CodeGen/ReachingPhysDefs.h --- Reaching definitions -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Reaching definitions of physical registers. +// +// This algorithm originated in the AArch64CollectLOHs pass. It was +// factored out so that targets could start to use a common class for +// reaching definitions. More specifically, SystemZ::ElimCompare.cpp +// was the initial reason for trying this. It is the hope and ambition +// this will evolve into a full fledged post-ra def/use analalyzer +// that suits the needs of all targets. + +#ifndef LLVM_CODEGEN_REACHINGDEFS_H +#define LLVM_CODEGEN_REACHINGDEFS_H + +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +using namespace llvm; + +namespace llvm { + +/// A set of MachineInstruction. Const is not used as it is allowed +/// for users of ReachingPhysDefs to transform instructions in maps. +struct SetOfMachineInstr : SetVector { + void dump(); +}; + +/// Map a basic block to a set of instructions per register. +/// This is used to represent the exposed uses of a basic block +/// per register. +typedef MapVector> + BlockToSetOfInstrsPerColor; + +/// Map a basic block to an instruction per register. +/// This is used to represent the live-out definitions of a basic block +/// per register. +typedef MapVector> + BlockToInstrPerColor; + +/// Map an instruction to a set of instructions. Used to represent the +/// mapping def to reachable uses or use to definitions. +typedef MapVector InstrToInstrs; + +/// Map a basic block to a BitVector. +/// This is used to record the kill registers per basic block. +typedef MapVector BlockToRegSet; + +/// Map a register to a dense id. +typedef DenseMap MapRegToId; + +/// Map a dense id to a register. Used for debug purposes. +typedef SmallVector MapIdToReg; + +class ReachingPhysDefs { +protected: + MachineFunction *MF; + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + + /// Name for this map, in debug outputs. + std::string Name; + + /// If true, only a local analysis is done. + bool BasicBlockScopeOnly; + + /// Dummy uses are recorded using this MI, during local analysis. + MachineInstr *DummyOp; + + /// structures: + /// For each basic block. + /// Out: a set per color of definitions that reach the + /// out boundary of this block. + /// In: Same as Out but for in boundary. + /// Gen: generated color in this block (one operation per color). + /// Kill: register set of killed color in this block. + /// ReachableUses: a set per color of uses (operation) reachable + /// for "In" definitions. + BlockToSetOfInstrsPerColor Out, In, ReachableUses; + BlockToInstrPerColor Gen; + BlockToRegSet Kill; + + /// Given a couple (MBB, reg) get the corresponding set of + /// instructions from the given "sets". + SetOfMachineInstr &getSet(BlockToSetOfInstrsPerColor &sets, + const MachineBasicBlock &MBB, unsigned reg); + + /// Given a couple (reg, MI) get the corresponding set of + /// instructions from the the given "sets". + SetOfMachineInstr &getUses(InstrToInstrs *sets, unsigned reg, + MachineInstr &MI); + + /// Same as getUses but does not modify the input map "sets". + /// \return NULL if the couple (reg, MI) is not in sets. + const SetOfMachineInstr *getUses(const InstrToInstrs *sets, unsigned reg, + MachineInstr &MI); + + //// Virtual methods that make it possible to override common analysis + //// results in a a specialized transformation context. + + /// Process the use operands of/MI. + virtual void processMIUses(MachineInstr &MI); + /// Given MO, return a used register if any. + virtual unsigned processMIUseMO(MachineOperand &MO); + /// Process MI clobbsers (regmask) + virtual void processMIClobbers(MachineInstr &MI); + /// Process the defs of MI. + virtual void processMIDefs(MachineInstr &MI); + + /// Indicate if an MI that clobbers a reg should be inserted into + /// maps. + virtual bool rememberClobberingMI() { return false; } + + /// Indicate if dummy uses should be added to DummyOp in case only + /// doing local analysis. + virtual bool recordDummyUses() { return true; } + + /// Initialize the reaching definition algorithm. + void initReachingDef(); + + /// Reaching definition algorithm. + void reachingDef(); + + /// Reaching def core algorithm. + void reachingDefAlgorithm(); + + /// Build the inverse map from Use to Defs. + virtual void reachedUsesToDefs(); + + // Register numbers are not stored directly in maps but are mapped + // to id's that start from 0. + + /// Current ID, to be assigned to next reg added. + unsigned CurRegId; + /// Map from reg to its id. + MapRegToId RegToId; + /// Map from id to reg. + MapIdToReg IdToReg; + + ///// The resulting data structures + + /// A two dimensional map from [reg][DefMI] -> {Uses} + InstrToInstrs *ColorOpToReachedUses; + /// A two dimensional map from [reg][UseMI] -> {Defs} + InstrToInstrs *ColorOpToReachingDefs; + /// A map from [UseMI] -> {All DefMIs of any used reg of UseMI} + InstrToInstrs UseToDefs; + +public: + ReachingPhysDefs(MachineFunction *MF_, const TargetRegisterInfo *TRI_, + bool BasicBlockScopeOnly, + const Twine &name = "Reaching defs"); + ~ReachingPhysDefs(); + + /// Add reg to the set of registers to be handled by the algorithm. + void addReg(unsigned reg); + + /// Add all target registers to be handled by the algorithm. + void addAllTargetRegs(); + + /// Return number of regs added to maps. + unsigned nbRegs() { return RegToId.size(); } + + /// Return true if reg has already been added. + bool isRegInMaps(unsigned reg) { + return (RegToId.find(reg) != RegToId.end()); + } + + /// Do a reaching defs analyzis on a given set of phys regs. If a + /// single reg is given, it is added. + bool analyze(unsigned singleReg = 0); + + /// Some transformations rather leave things alone if a use of a reg + /// have multiple defs. This method removes all defs with such a + /// user from the ColorOpToReachedUses map, for the register + /// involved. + void pruneMultipleDefs(); + +#ifndef NDEBUG + /// print the result of the reaching definition algorithm. + void dump(); +#endif + + /// Return the uses of MIs' definition of reg. + SetOfMachineInstr *getUses(unsigned reg, MachineInstr &MI); +}; + +} // end llvm namespace. + +#endif Index: lib/CodeGen/CMakeLists.txt =================================================================== --- lib/CodeGen/CMakeLists.txt +++ lib/CodeGen/CMakeLists.txt @@ -93,6 +93,7 @@ ProcessImplicitDefs.cpp PrologEpilogInserter.cpp PseudoSourceValue.cpp + ReachingPhysDefs.cpp RegAllocBase.cpp RegAllocBasic.cpp RegAllocFast.cpp Index: lib/CodeGen/ReachingPhysDefs.cpp =================================================================== --- /dev/null +++ lib/CodeGen/ReachingPhysDefs.cpp @@ -0,0 +1,377 @@ +//===- llvm/CodeGen/ReachingPhysDefs.h --- Reaching definitions -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/ReachingPhysDefs.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "reachingdefs" + +static void dumpMIWithBB(const MachineInstr *MI) { + dbgs() << "MBB#" << MI->getParent()->getNumber() + << ":"; MI->dump(); +} + +void SetOfMachineInstr::dump() { + for (auto *I : *this) { + dbgs() << "\t"; dumpMIWithBB(I); + } + dbgs() << "\n"; +} + +/// the given "sets". +/// If this couple does not reference any set, an empty set is added to "sets" +/// for this couple and returned. +SetOfMachineInstr &ReachingPhysDefs:: +getSet(BlockToSetOfInstrsPerColor &sets, const MachineBasicBlock &MBB, + unsigned reg) { + SetOfMachineInstr *result; + BlockToSetOfInstrsPerColor::iterator it = sets.find(&MBB); + if (it != sets.end()) + result = it->second.get(); + else + result = (sets[&MBB] = make_unique(nbRegs())).get(); + + return result[reg]; +} + +/// Given a couple (reg, MI) get the corresponding set of instructions +/// from the the given "sets". This is used to get the uses record in +/// sets of a definition identified by MI and reg, i.e., MI defines +/// reg. If the couple does not reference anything, an empty set is +/// added to "sets[reg]". +SetOfMachineInstr &ReachingPhysDefs::getUses(InstrToInstrs *sets, unsigned reg, + MachineInstr &MI) { + return sets[reg][&MI]; +} + +/// Same as getUses but does not modify the input map "sets". +/// \return NULL if the couple (reg, MI) is not in sets. +const SetOfMachineInstr *ReachingPhysDefs:: +getUses(const InstrToInstrs *sets, unsigned reg, MachineInstr &MI) { + InstrToInstrs::const_iterator Res = sets[reg].find(&MI); + if (Res != sets[reg].end()) + return &(Res->second); + return nullptr; +} + +SetOfMachineInstr *ReachingPhysDefs:: +getUses(unsigned reg, MachineInstr &MI) { + return &getUses(ColorOpToReachedUses, RegToId.find(reg)->second, MI); +} + +ReachingPhysDefs:: +ReachingPhysDefs(MachineFunction *MF_, const TargetRegisterInfo *TRI_, + bool BBScopeOnly, const Twine &name) + : MF(MF_), TRI(TRI_), TII(nullptr), Name(name.str()), + BasicBlockScopeOnly(BBScopeOnly), DummyOp(nullptr), CurRegId(0), + ColorOpToReachedUses(nullptr), ColorOpToReachingDefs(nullptr) +{ + TII = MF->getSubtarget().getInstrInfo(); +} + +ReachingPhysDefs::~ReachingPhysDefs() { + if (ColorOpToReachedUses != nullptr) + delete[] ColorOpToReachedUses; + if (ColorOpToReachingDefs != nullptr) + delete[] ColorOpToReachingDefs; + if (DummyOp != nullptr) + MF->DeleteMachineInstr(DummyOp); +} + +void ReachingPhysDefs:: +pruneMultipleDefs() { + for (unsigned CurReg = 0; CurReg < nbRegs(); ++CurReg) { + bool change = true; + while (change) { + change = false; + for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) { + SetOfMachineInstr toPrune; + for (MachineInstr *Use : DefsIt.second) + if (ColorOpToReachingDefs[CurReg][Use].size() > 1) { + DEBUG(dbgs() << "Found user with multiple defs (" + << PrintReg(IdToReg[CurReg], TRI) << "):\n"; + dumpMIWithBB(Use); + ColorOpToReachingDefs[CurReg][Use].dump();); + toPrune.insert(ColorOpToReachingDefs[CurReg][Use].begin(), + ColorOpToReachingDefs[CurReg][Use].end()); + break; + } + + if (!toPrune.empty()) { + for (MachineInstr *Def : toPrune) { + DEBUG(dbgs() << "Pruning reaching def:\n"; Def->dump(); + ColorOpToReachedUses[CurReg][Def].dump()); + ColorOpToReachedUses[CurReg].erase(Def); + } + change = true; + break; + } + } + } + } +} + +bool ReachingPhysDefs::analyze(unsigned singleReg) { + if (singleReg) + addReg(singleReg); + + if (!nbRegs()) + return false; + + assert(ColorOpToReachedUses == nullptr && "Don't call analyze twice!"); + ColorOpToReachedUses = new InstrToInstrs[nbRegs()]; + ColorOpToReachingDefs = new InstrToInstrs[nbRegs()]; + if (BasicBlockScopeOnly) { + // For local analysis, create a dummy operation to record uses + // that are not local. + DummyOp = MF->CreateMachineInstr(TII->get(TargetOpcode::COPY), DebugLoc()); + } + + // Compute the reaching defs. + reachingDef(); + + // Translate the definition to uses map into a use to definitions map. + reachedUsesToDefs(); + + return true; +} + +/// Map registers with dense id in RegToId and vice-versa in +/// IdToReg. IdToReg is populated only in DEBUG mode. +void ReachingPhysDefs::addReg(unsigned reg) { + DEBUG(IdToReg.push_back(reg); + assert(IdToReg[CurRegId] == reg && + "Reg index mismatches insertion index.")); + RegToId[reg] = CurRegId++; +} + +/// Map registers with dense id in RegToId and vice-versa in +/// IdToReg. IdToReg is populated only in DEBUG mode. +void ReachingPhysDefs::addAllTargetRegs() { + CurRegId = 0; + unsigned NbReg = TRI->getNumRegs(); + for (; CurRegId < NbReg; ++CurRegId) { + RegToId[CurRegId] = CurRegId; + DEBUG(IdToReg.push_back(CurRegId);); + } +} + +/// Initialize the reaching definition algorithm: +/// For each basic block BB in MF, record: +/// - its kill set. +/// - its reachable uses (uses that are exposed to BB's predecessors). +/// - its generated definitions. +void ReachingPhysDefs::initReachingDef() { + for (MachineBasicBlock &MBB : *MF) { + auto &BBGen = Gen[&MBB]; + BBGen = make_unique(nbRegs()); + std::fill(BBGen.get(), BBGen.get() + nbRegs(), nullptr); + + BitVector &BBKillSet = Kill[&MBB]; + BBKillSet.resize(nbRegs()); + + for (MachineInstr &MI : MBB) { + // Process uses first. + processMIUses(MI); + processMIClobbers(MI); + processMIDefs(MI); + } + + // If we restrict our analysis to basic block scope, + // conservatively add a dummy use for each generated value. + if (recordDummyUses() && DummyOp && !MBB.succ_empty()) + for (unsigned CurReg = 0; CurReg < nbRegs(); ++CurReg) + if (BBGen[CurReg]) + getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(DummyOp); + } +} + +unsigned ReachingPhysDefs::processMIUseMO(MachineOperand &MO) { + if (!MO.isReg() || !MO.isUse()) + return 0; + return MO.getReg(); +} + +void ReachingPhysDefs::processMIUses(MachineInstr &MI) { + const MachineBasicBlock *MBB = MI.getParent(); + auto &BBGen = Gen[MBB]; + BitVector &BBKillSet = Kill[MBB]; + + for (MachineOperand &MO : MI.operands()) { + unsigned CurReg = processMIUseMO(MO); + if (!CurReg) + continue; + MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg); + if (ItCurRegId == RegToId.end()) + continue; + CurReg = ItCurRegId->second; + + // if CurReg has not been defined, this use is reachable. + if (!BBGen[CurReg] && !BBKillSet.test(CurReg)) + getSet(ReachableUses, *MBB, CurReg).insert(&MI); + // current basic block definition for this color, if any, is in Gen. + if (BBGen[CurReg]) + getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(&MI); + } +} + +// Process clobbers. +void ReachingPhysDefs::processMIClobbers(MachineInstr &MI) { + const MachineBasicBlock *MBB = MI.getParent(); + auto &BBGen = Gen[MBB]; + BitVector &BBKillSet = Kill[MBB]; + + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isRegMask()) + continue; + // Clobbers kill the related colors. + const uint32_t *PreservedRegs = MO.getRegMask(); + + // Set generated regs. + for (const auto &Entry : RegToId) { + unsigned Reg = Entry.second; + // Use the global register ID when querying APIs external to this + // pass. + if (MachineOperand::clobbersPhysReg(PreservedRegs, Entry.first)) { + // Do not register clobbered definition - this definition + // is not used anyway (otherwise register allocation is + // wrong). + BBGen[Reg] = rememberClobberingMI() ? &MI : nullptr; + BBKillSet.set(Reg); + } + } + } +} + +// Process register defs. +void ReachingPhysDefs::processMIDefs(MachineInstr &MI) { + const MachineBasicBlock *MBB = MI.getParent(); + auto &BBGen = Gen[MBB]; + BitVector &BBKillSet = Kill[MBB]; + + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned CurReg = MO.getReg(); + MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg); + if (ItCurRegId == RegToId.end()) + continue; + + for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) { + MapRegToId::const_iterator ItRegId = RegToId.find(*AI); + // If this alias has not been recorded, then it is not interesting + // for the current analysis. XXX True generally? + // We can end up in this situation because of tuple registers. + // E.g., Let say we are interested in S1. When we register + // S1, we will also register its aliases and in particular + // the tuple Q1_Q2. + // Now, when we encounter Q1_Q2, we will look through its aliases + // and will find that S2 is not registered. + if (ItRegId == RegToId.end()) + continue; + + BBKillSet.set(ItRegId->second); + BBGen[ItRegId->second] = &MI; + } + BBGen[ItCurRegId->second] = &MI; + } +} + +/// Reaching def core algorithm: +/// while an Out has changed +/// for each bb +/// for each color +/// In[bb][color] = U Out[bb.predecessors][color] +/// insert reachableUses[bb][color] in each in[bb][color] +/// op.reachedUses +/// +/// Out[bb] = Gen[bb] U (In[bb] - Kill[bb]) +void ReachingPhysDefs::reachingDefAlgorithm() { + bool HasChanged; + do { + HasChanged = false; + for (const MachineBasicBlock &MBB : *MF) { + unsigned CurReg; + for (CurReg = 0; CurReg < nbRegs(); ++CurReg) { + SetOfMachineInstr &BBInSet = getSet(In, MBB, CurReg); + SetOfMachineInstr &BBReachableUses = + getSet(ReachableUses, MBB, CurReg); + SetOfMachineInstr &BBOutSet = getSet(Out, MBB, CurReg); + unsigned Size = BBOutSet.size(); + // In[bb][color] = U Out[bb.predecessors][color] + for (const MachineBasicBlock *PredMBB : MBB.predecessors()) { + SetOfMachineInstr &PredOutSet = getSet(Out, *PredMBB, CurReg); + BBInSet.insert(PredOutSet.begin(), PredOutSet.end()); + } + // insert reachableUses[bb][color] in each in[bb][color] op.reachedses + for (MachineInstr *MI : BBInSet) { + SetOfMachineInstr &OpReachedUses = + getUses(ColorOpToReachedUses, CurReg, *MI); + OpReachedUses.insert(BBReachableUses.begin(), BBReachableUses.end()); + } + // Out[bb] = Gen[bb] U (In[bb] - Kill[bb]) + if (!Kill[&MBB].test(CurReg)) + BBOutSet.insert(BBInSet.begin(), BBInSet.end()); + if (Gen[&MBB][CurReg]) + BBOutSet.insert(Gen[&MBB][CurReg]); + HasChanged |= BBOutSet.size() != Size; + } + } + } while (HasChanged); +} + + +/// Reaching definition algorithm. +/// On completion, ColorOpToReachedUses will contain the result of the +/// reaching def algorithm. +void ReachingPhysDefs::reachingDef() { + + // Initialize Gen, kill and reachableUses. + initReachingDef(); + + // Algo. + if (!DummyOp) + reachingDefAlgorithm(); +} + +/// Build the inverse maps from Use to Defs. +void ReachingPhysDefs::reachedUsesToDefs() { + for (unsigned CurReg = 0; CurReg < nbRegs(); ++CurReg) { + for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) { + MachineInstr *Def = DefsIt.first; + for (MachineInstr *Use : DefsIt.second) { + UseToDefs[Use].insert(Def); + ColorOpToReachingDefs[CurReg][Use].insert(Def); + } + } + } +} + +#ifndef NDEBUG +/// print the result of the reaching definition algorithm. +void ReachingPhysDefs::dump() { + DEBUG(dbgs() << Name << ":\n"); + unsigned CurReg; + for (CurReg = 0; CurReg < nbRegs(); ++CurReg) { + if (ColorOpToReachedUses[CurReg].empty()) + continue; + DEBUG(dbgs() << "Reg " << PrintReg(IdToReg[CurReg], TRI) << " \n"); + for (auto &DefsIt : ColorOpToReachedUses[CurReg]) { + DEBUG(dumpMIWithBB(DefsIt.first); + DefsIt.second.dump();); + } + } +} +#endif // NDEBUG + Index: lib/Target/AArch64/AArch64CollectLOH.cpp =================================================================== --- lib/Target/AArch64/AArch64CollectLOH.cpp +++ lib/Target/AArch64/AArch64CollectLOH.cpp @@ -114,6 +114,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/ReachingPhysDefs.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -123,7 +124,7 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; -#define DEBUG_TYPE "aarch64-collect-loh" +#define DEBUG_TYPE "reachingdefs" static cl::opt PreCollectRegister("aarch64-collect-loh-pre-collect-register", cl::Hidden, @@ -192,31 +193,7 @@ private: }; -/// A set of MachineInstruction. -typedef SetVector SetOfMachineInstr; -/// Map a basic block to a set of instructions per register. -/// This is used to represent the exposed uses of a basic block -/// per register. -typedef MapVector> -BlockToSetOfInstrsPerColor; -/// Map a basic block to an instruction per register. -/// This is used to represent the live-out definitions of a basic block -/// per register. -typedef MapVector> -BlockToInstrPerColor; -/// Map an instruction to a set of instructions. Used to represent the -/// mapping def to reachable uses or use to definitions. -typedef MapVector InstrToInstrs; -/// Map a basic block to a BitVector. -/// This is used to record the kill registers per basic block. -typedef MapVector BlockToRegSet; - -/// Map a register to a dense id. -typedef DenseMap MapRegToId; -/// Map a dense id to a register. Used for debug purposes. -typedef SmallVector MapIdToReg; + } // end anonymous namespace. char AArch64CollectLOH::ID = 0; @@ -227,269 +204,41 @@ INITIALIZE_PASS_END(AArch64CollectLOH, "aarch64-collect-loh", AARCH64_COLLECT_LOH_NAME, false, false) -/// Given a couple (MBB, reg) get the corresponding set of instruction from -/// the given "sets". -/// If this couple does not reference any set, an empty set is added to "sets" -/// for this couple and returned. -/// \param nbRegs is used internally allocate some memory. It must be consistent -/// with the way sets is used. -static SetOfMachineInstr &getSet(BlockToSetOfInstrsPerColor &sets, - const MachineBasicBlock &MBB, unsigned reg, - unsigned nbRegs) { - SetOfMachineInstr *result; - BlockToSetOfInstrsPerColor::iterator it = sets.find(&MBB); - if (it != sets.end()) - result = it->second.get(); - else - result = (sets[&MBB] = make_unique(nbRegs)).get(); - - return result[reg]; -} - -/// Given a couple (reg, MI) get the corresponding set of instructions from the -/// the given "sets". -/// This is used to get the uses record in sets of a definition identified by -/// MI and reg, i.e., MI defines reg. -/// If the couple does not reference anything, an empty set is added to -/// "sets[reg]". -/// \pre set[reg] is valid. -static SetOfMachineInstr &getUses(InstrToInstrs *sets, unsigned reg, - const MachineInstr &MI) { - return sets[reg][&MI]; -} - -/// Same as getUses but does not modify the input map: sets. -/// \return NULL if the couple (reg, MI) is not in sets. -static const SetOfMachineInstr *getUses(const InstrToInstrs *sets, unsigned reg, - const MachineInstr &MI) { - InstrToInstrs::const_iterator Res = sets[reg].find(&MI); - if (Res != sets[reg].end()) - return &(Res->second); - return nullptr; -} - -/// Initialize the reaching definition algorithm: -/// For each basic block BB in MF, record: -/// - its kill set. -/// - its reachable uses (uses that are exposed to BB's predecessors). -/// - its the generated definitions. -/// \param DummyOp if not NULL, specifies a Dummy Operation to be added to -/// the list of uses of exposed defintions. -/// \param ADRPMode specifies to only consider ADRP instructions for generated -/// definition. It also consider definitions of ADRP instructions as uses and -/// ignore other uses. The ADRPMode is used to collect the information for LHO -/// that involve ADRP operation only. -static void initReachingDef(const MachineFunction &MF, - InstrToInstrs *ColorOpToReachedUses, - BlockToInstrPerColor &Gen, BlockToRegSet &Kill, - BlockToSetOfInstrsPerColor &ReachableUses, - const MapRegToId &RegToId, - const MachineInstr *DummyOp, bool ADRPMode) { - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - unsigned NbReg = RegToId.size(); - - for (const MachineBasicBlock &MBB : MF) { - auto &BBGen = Gen[&MBB]; - BBGen = make_unique(NbReg); - std::fill(BBGen.get(), BBGen.get() + NbReg, nullptr); - - BitVector &BBKillSet = Kill[&MBB]; - BBKillSet.resize(NbReg); - for (const MachineInstr &MI : MBB) { - bool IsADRP = MI.getOpcode() == AArch64::ADRP; - - // Process uses first. - if (IsADRP || !ADRPMode) - for (const MachineOperand &MO : MI.operands()) { - // Treat ADRP def as use, as the goal of the analysis is to find - // ADRP defs reached by other ADRP defs. - if (!MO.isReg() || (!ADRPMode && !MO.isUse()) || - (ADRPMode && (!IsADRP || !MO.isDef()))) - continue; - unsigned CurReg = MO.getReg(); - MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg); - if (ItCurRegId == RegToId.end()) - continue; - CurReg = ItCurRegId->second; - - // if CurReg has not been defined, this use is reachable. - if (!BBGen[CurReg] && !BBKillSet.test(CurReg)) - getSet(ReachableUses, MBB, CurReg, NbReg).insert(&MI); - // current basic block definition for this color, if any, is in Gen. - if (BBGen[CurReg]) - getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(&MI); - } - - // Process clobbers. - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isRegMask()) - continue; - // Clobbers kill the related colors. - const uint32_t *PreservedRegs = MO.getRegMask(); - - // Set generated regs. - for (const auto &Entry : RegToId) { - unsigned Reg = Entry.second; - // Use the global register ID when querying APIs external to this - // pass. - if (MachineOperand::clobbersPhysReg(PreservedRegs, Entry.first)) { - // Do not register clobbered definition for no ADRP. - // This definition is not used anyway (otherwise register - // allocation is wrong). - BBGen[Reg] = ADRPMode ? &MI : nullptr; - BBKillSet.set(Reg); - } - } - } - - // Process register defs. - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isReg() || !MO.isDef()) - continue; - unsigned CurReg = MO.getReg(); - MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg); - if (ItCurRegId == RegToId.end()) - continue; - - for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) { - MapRegToId::const_iterator ItRegId = RegToId.find(*AI); - // If this alias has not been recorded, then it is not interesting - // for the current analysis. - // We can end up in this situation because of tuple registers. - // E.g., Let say we are interested in S1. When we register - // S1, we will also register its aliases and in particular - // the tuple Q1_Q2. - // Now, when we encounter Q1_Q2, we will look through its aliases - // and will find that S2 is not registered. - if (ItRegId == RegToId.end()) - continue; - - BBKillSet.set(ItRegId->second); - BBGen[ItRegId->second] = &MI; - } - BBGen[ItCurRegId->second] = &MI; - } - } - // If we restrict our analysis to basic block scope, conservatively add a - // dummy - // use for each generated value. - if (!ADRPMode && DummyOp && !MBB.succ_empty()) - for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg) - if (BBGen[CurReg]) - getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(DummyOp); - } -} +// A class to compute the reaching def in ADRP mode, meaning ADRP +// definitions are first considered as uses. +class ReachingPhysDefs_ADRP : public ReachingPhysDefs { + unsigned processMIUseMO(MachineOperand &MO) override; + bool rememberClobberingMI() override { return true; } + void reachedUsesToDefs() override; + bool recordDummyUses() override { return false; } + +public: + ReachingPhysDefs_ADRP(MachineFunction *MF_, const TargetRegisterInfo *TRI_, + bool BasicBlockScopeOnly) + : ReachingPhysDefs(MF_, TRI_, BasicBlockScopeOnly, + "ADRP reaching defs") {} + void addRegs(); + InstrToInstrs &getUseToDefs() { return UseToDefs; } +}; -/// Reaching def core algorithm: -/// while an Out has changed -/// for each bb -/// for each color -/// In[bb][color] = U Out[bb.predecessors][color] -/// insert reachableUses[bb][color] in each in[bb][color] -/// op.reachedUses -/// -/// Out[bb] = Gen[bb] U (In[bb] - Kill[bb]) -static void reachingDefAlgorithm(const MachineFunction &MF, - InstrToInstrs *ColorOpToReachedUses, - BlockToSetOfInstrsPerColor &In, - BlockToSetOfInstrsPerColor &Out, - BlockToInstrPerColor &Gen, BlockToRegSet &Kill, - BlockToSetOfInstrsPerColor &ReachableUses, - unsigned NbReg) { - bool HasChanged; - do { - HasChanged = false; - for (const MachineBasicBlock &MBB : MF) { - unsigned CurReg; - for (CurReg = 0; CurReg < NbReg; ++CurReg) { - SetOfMachineInstr &BBInSet = getSet(In, MBB, CurReg, NbReg); - SetOfMachineInstr &BBReachableUses = - getSet(ReachableUses, MBB, CurReg, NbReg); - SetOfMachineInstr &BBOutSet = getSet(Out, MBB, CurReg, NbReg); - unsigned Size = BBOutSet.size(); - // In[bb][color] = U Out[bb.predecessors][color] - for (const MachineBasicBlock *PredMBB : MBB.predecessors()) { - SetOfMachineInstr &PredOutSet = getSet(Out, *PredMBB, CurReg, NbReg); - BBInSet.insert(PredOutSet.begin(), PredOutSet.end()); - } - // insert reachableUses[bb][color] in each in[bb][color] op.reachedses - for (const MachineInstr *MI : BBInSet) { - SetOfMachineInstr &OpReachedUses = - getUses(ColorOpToReachedUses, CurReg, *MI); - OpReachedUses.insert(BBReachableUses.begin(), BBReachableUses.end()); - } - // Out[bb] = Gen[bb] U (In[bb] - Kill[bb]) - if (!Kill[&MBB].test(CurReg)) - BBOutSet.insert(BBInSet.begin(), BBInSet.end()); - if (Gen[&MBB][CurReg]) - BBOutSet.insert(Gen[&MBB][CurReg]); - HasChanged |= BBOutSet.size() != Size; - } - } - } while (HasChanged); -} +class ReachingPhysDefs_Others : public ReachingPhysDefs { + void reachedUsesToDefs() override; -/// Reaching definition algorithm. -/// \param MF function on which the algorithm will operate. -/// \param[out] ColorOpToReachedUses will contain the result of the reaching -/// def algorithm. -/// \param ADRPMode specify whether the reaching def algorithm should be tuned -/// for ADRP optimization. \see initReachingDef for more details. -/// \param DummyOp if not NULL, the algorithm will work at -/// basic block scope and will set for every exposed definition a use to -/// @p DummyOp. -/// \pre ColorOpToReachedUses is an array of at least number of registers of -/// InstrToInstrs. -static void reachingDef(const MachineFunction &MF, - InstrToInstrs *ColorOpToReachedUses, - const MapRegToId &RegToId, bool ADRPMode = false, - const MachineInstr *DummyOp = nullptr) { - // structures: - // For each basic block. - // Out: a set per color of definitions that reach the - // out boundary of this block. - // In: Same as Out but for in boundary. - // Gen: generated color in this block (one operation per color). - // Kill: register set of killed color in this block. - // ReachableUses: a set per color of uses (operation) reachable - // for "In" definitions. - BlockToSetOfInstrsPerColor Out, In, ReachableUses; - BlockToInstrPerColor Gen; - BlockToRegSet Kill; - - // Initialize Gen, kill and reachableUses. - initReachingDef(MF, ColorOpToReachedUses, Gen, Kill, ReachableUses, RegToId, - DummyOp, ADRPMode); - - // Algo. - if (!DummyOp) - reachingDefAlgorithm(MF, ColorOpToReachedUses, In, Out, Gen, Kill, - ReachableUses, RegToId.size()); -} +public: + ReachingPhysDefs_Others(MachineFunction *MF_, const TargetRegisterInfo *TRI_, + bool BasicBlockScopeOnly) + : ReachingPhysDefs(MF_, TRI_, BasicBlockScopeOnly, + "All reaching defs") {} + void addRegs(); -#ifndef NDEBUG -/// print the result of the reaching definition algorithm. -static void printReachingDef(const InstrToInstrs *ColorOpToReachedUses, - unsigned NbReg, const TargetRegisterInfo *TRI, - const MapIdToReg &IdToReg) { - unsigned CurReg; - for (CurReg = 0; CurReg < NbReg; ++CurReg) { - if (ColorOpToReachedUses[CurReg].empty()) - continue; - DEBUG(dbgs() << "*** Reg " << PrintReg(IdToReg[CurReg], TRI) << " ***\n"); + InstrToInstrs &getUseToDefs() { return UseToDefs; } - for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) { - DEBUG(dbgs() << "Def:\n"); - DEBUG(DefsIt.first->print(dbgs())); - DEBUG(dbgs() << "Reachable uses:\n"); - for (const MachineInstr *MI : DefsIt.second) { - DEBUG(MI->print(dbgs())); - } - } + /// Returns the first definition in UseToDefs for MI. + MachineInstr *getDef(MachineInstr *MI) { + return *UseToDefs.find(MI)->second.begin(); } -} -#endif // NDEBUG +}; /// Answer the following question: Can Def be one of the definition /// involved in a part of a LOH? @@ -525,6 +274,32 @@ return false; } +/// Check whether the given instruction can be the end of a LOH chain +/// involving a load. +static bool isCandidateLoad(const MachineInstr *Instr) { + switch (Instr->getOpcode()) { + default: + return false; + case AArch64::LDRSBWui: + case AArch64::LDRSBXui: + case AArch64::LDRSHWui: + case AArch64::LDRSHXui: + case AArch64::LDRSWui: + case AArch64::LDRBui: + case AArch64::LDRHui: + case AArch64::LDRWui: + case AArch64::LDRXui: + case AArch64::LDRSui: + case AArch64::LDRDui: + case AArch64::LDRQui: + if (Instr->getOperand(2).getTargetFlags() & AArch64II::MO_GOT) + return false; + return true; + } + // Unreachable. + return false; +} + /// Check whether the given instruction can the end of a LOH chain involving a /// store. static bool isCandidateStore(const MachineInstr *Instr) { @@ -549,17 +324,98 @@ return false; } -/// Given the result of a reaching definition algorithm in ColorOpToReachedUses, -/// Build the Use to Defs information and filter out obvious non-LOH candidates. -/// In ADRPMode, non-LOH candidates are "uses" with non-ADRP definitions. -/// In non-ADRPMode, non-LOH candidates are "uses" with several definition, -/// i.e., no simple chain. -/// \param ADRPMode -- \see initReachingDef. -static void reachedUsesToDefs(InstrToInstrs &UseToReachingDefs, - const InstrToInstrs *ColorOpToReachedUses, - const MapRegToId &RegToId, - bool ADRPMode = false) { +/// Look for every register defined by potential LOHs candidates. +static void collectLOHInvolvedRegs(ReachingPhysDefs &ReachingDefs, + MachineFunction *MF) { + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + + DEBUG(dbgs() << "** Collect Involved Register\n"); + for (const auto &MBB : *MF) { + for (const MachineInstr &MI : MBB) { + if (!canDefBePartOfLOH(&MI) && + !isCandidateLoad(&MI) && !isCandidateStore(&MI)) + continue; + + // Process defs + for (MachineInstr::const_mop_iterator IO = MI.operands_begin(), + IOEnd = MI.operands_end(); + IO != IOEnd; ++IO) { + if (!IO->isReg() || !IO->isDef()) + continue; + unsigned CurReg = IO->getReg(); + for (MCRegAliasIterator AI(CurReg, TRI, true); + AI.isValid(); ++AI) + if (!ReachingDefs.isRegInMaps(*AI)) { + ReachingDefs.addReg(*AI); + DEBUG(dbgs() << "Register: " << PrintReg(*AI, TRI) + << '\n'); + } + } + } + } +} + +void ReachingPhysDefs_ADRP::addRegs() { + if (!PreCollectRegister) { + ReachingPhysDefs::addAllTargetRegs(); + return; + } + + collectLOHInvolvedRegs(*this, MF); +} + +unsigned ReachingPhysDefs_ADRP::processMIUseMO(MachineOperand &MO) { + // Only handle ADRP instructions, and treat ADRP def as use, as the + // goal of the analysis is to find ADRP defs reached by other ADRP + // defs. + if (MO.getParent()->getOpcode() != AArch64::ADRP || !MO.isReg() || !MO.isDef()) + return 0; + return MO.getReg(); +} + +void ReachingPhysDefs_ADRP::reachedUsesToDefs() { + SetOfMachineInstr NotCandidate; + MapRegToId::const_iterator EndIt = RegToId.end(); + for (unsigned CurReg = 0; CurReg < nbRegs(); ++CurReg) { + // If this color is never defined, continue. + if (ColorOpToReachedUses[CurReg].empty()) + continue; + + for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) { + for (MachineInstr *MI : DefsIt.second) { + const MachineInstr *Def = DefsIt.first; + MapRegToId::const_iterator It; + // if all the reaching defs are not adrp, this use will not be + // simplifiable. + if (Def->getOpcode() != AArch64::ADRP) { + NotCandidate.insert(MI); + continue; + } + // Do not consider self reaching as a simplifiable case for ADRP. + if (MI != DefsIt.first) + UseToDefs[MI].insert(DefsIt.first); + } + } + } + for (MachineInstr *Elem : NotCandidate) { + DEBUG(dbgs() << "Too many reaching defs: " << *Elem << "\n"); + // It would have been better if we could just remove the entry + // from the map. Because of that, we have to filter the garbage + // (second.empty) in the subsequence analysis. + UseToDefs[Elem].clear(); + } +} + +void ReachingPhysDefs_Others::addRegs() { + if (!PreCollectRegister) { + ReachingPhysDefs::addAllTargetRegs(); + return; + } + + collectLOHInvolvedRegs(*this, MF); +} +void ReachingPhysDefs_Others::reachedUsesToDefs() { SetOfMachineInstr NotCandidate; unsigned NbReg = RegToId.size(); MapRegToId::const_iterator EndIt = RegToId.end(); @@ -569,14 +425,13 @@ continue; for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) { - for (const MachineInstr *MI : DefsIt.second) { - const MachineInstr *Def = DefsIt.first; + for (MachineInstr *MI : DefsIt.second) { + MachineInstr *Def = DefsIt.first; MapRegToId::const_iterator It; // if all the reaching defs are not adrp, this use will not be // simplifiable. - if ((ADRPMode && Def->getOpcode() != AArch64::ADRP) || - (!ADRPMode && !canDefBePartOfLOH(Def)) || - (!ADRPMode && isCandidateStore(MI) && + if (!canDefBePartOfLOH(Def) || + (isCandidateStore(MI) && // store are LOH candidate iff the end of the chain is used as // base. ((It = RegToId.find((MI)->getOperand(1).getReg())) == EndIt || @@ -584,33 +439,32 @@ NotCandidate.insert(MI); continue; } - // Do not consider self reaching as a simplifiable case for ADRP. - if (!ADRPMode || MI != DefsIt.first) { - UseToReachingDefs[MI].insert(DefsIt.first); - // If UsesIt has several reaching definitions, it is not - // candidate for simplificaton in non-ADRPMode. - if (!ADRPMode && UseToReachingDefs[MI].size() > 1) - NotCandidate.insert(MI); - } + UseToDefs[MI].insert(DefsIt.first); + // If UsesIt has several reaching definitions, it is not + // candidate for simplificaton in non-ADRPMode. + if (UseToDefs[MI].size() > 1) + NotCandidate.insert(MI); } } } - for (const MachineInstr *Elem : NotCandidate) { + for (MachineInstr *Elem : NotCandidate) { DEBUG(dbgs() << "Too many reaching defs: " << *Elem << "\n"); // It would have been better if we could just remove the entry // from the map. Because of that, we have to filter the garbage // (second.empty) in the subsequence analysis. - UseToReachingDefs[Elem].clear(); + UseToDefs[Elem].clear(); } } + + /// Based on the use to defs information (in ADRPMode), compute the /// opportunities of LOH ADRP-related. -static void computeADRP(const InstrToInstrs &UseToDefs, +static void computeADRP(ReachingPhysDefs_ADRP &ReachingDefs_ADRP, AArch64FunctionInfo &AArch64FI, const MachineDominatorTree *MDT) { DEBUG(dbgs() << "*** Compute LOH for ADRP\n"); - for (const auto &Entry : UseToDefs) { + for (const auto &Entry : ReachingDefs_ADRP.getUseToDefs()) { unsigned Size = Entry.second.size(); if (Size == 0) continue; @@ -642,32 +496,6 @@ } } -/// Check whether the given instruction can be the end of a LOH chain -/// involving a load. -static bool isCandidateLoad(const MachineInstr *Instr) { - switch (Instr->getOpcode()) { - default: - return false; - case AArch64::LDRSBWui: - case AArch64::LDRSBXui: - case AArch64::LDRSHWui: - case AArch64::LDRSHXui: - case AArch64::LDRSWui: - case AArch64::LDRBui: - case AArch64::LDRHui: - case AArch64::LDRWui: - case AArch64::LDRXui: - case AArch64::LDRSui: - case AArch64::LDRDui: - case AArch64::LDRQui: - if (Instr->getOperand(2).getTargetFlags() & AArch64II::MO_GOT) - return false; - return true; - } - // Unreachable. - return false; -} - /// Check whether the given instruction can load a litteral. static bool supportLoadFromLiteral(const MachineInstr *Instr) { switch (Instr->getOpcode()) { @@ -690,13 +518,13 @@ /// chain. /// \pre UseToDefs contains only on def per use, i.e., obvious non candidate are /// already been filtered out. -static bool isCandidate(const MachineInstr *Instr, +static bool isCandidate(MachineInstr *Instr, const InstrToInstrs &UseToDefs, const MachineDominatorTree *MDT) { if (!isCandidateLoad(Instr) && !isCandidateStore(Instr)) return false; - const MachineInstr *Def = *UseToDefs.find(Instr)->second.begin(); + MachineInstr *Def = *UseToDefs.find(Instr)->second.begin(); if (Def->getOpcode() != AArch64::ADRP) { // At this point, Def is ADDXri or LDRXui of the right type of // symbol, because we filtered out the uses that were not defined @@ -725,12 +553,10 @@ return false; } -static bool registerADRCandidate(const MachineInstr &Use, - const InstrToInstrs &UseToDefs, - const InstrToInstrs *DefsPerColorToUses, +static bool registerADRCandidate(MachineInstr &Use, + ReachingPhysDefs_Others &ReachingDefs, AArch64FunctionInfo &AArch64FI, - SetOfMachineInstr *InvolvedInLOHs, - const MapRegToId &RegToId) { + SetOfMachineInstr *InvolvedInLOHs) { // Look for opportunities to turn ADRP -> ADD or // ADRP -> LDR GOTPAGEOFF into ADR. // If ADRP has more than one use. Give up. @@ -738,17 +564,16 @@ (Use.getOpcode() != AArch64::LDRXui || !(Use.getOperand(2).getTargetFlags() & AArch64II::MO_GOT))) return false; - InstrToInstrs::const_iterator It = UseToDefs.find(&Use); + InstrToInstrs::const_iterator It = ReachingDefs.getUseToDefs().find(&Use); // The map may contain garbage that we need to ignore. - if (It == UseToDefs.end() || It->second.empty()) + if (It == ReachingDefs.getUseToDefs().end() || It->second.empty()) return false; - const MachineInstr &Def = **It->second.begin(); + MachineInstr &Def = **It->second.begin(); if (Def.getOpcode() != AArch64::ADRP) return false; // Check the number of users of ADRP. const SetOfMachineInstr *Users = - getUses(DefsPerColorToUses, - RegToId.find(Def.getOperand(0).getReg())->second, Def); + ReachingDefs.getUses(Def.getOperand(0).getReg(), Def); if (Users->size() > 1) { ++NumADRComplexCandidate; return false; @@ -772,9 +597,8 @@ /// Based on the use to defs information (in non-ADRPMode), compute the /// opportunities of LOH non-ADRP-related -static void computeOthers(const InstrToInstrs &UseToDefs, - const InstrToInstrs *DefsPerColorToUses, - AArch64FunctionInfo &AArch64FI, const MapRegToId &RegToId, +static void computeOthers(ReachingPhysDefs_Others &ReachingDefs, + AArch64FunctionInfo &AArch64FI, const MachineDominatorTree *MDT) { SetOfMachineInstr *InvolvedInLOHs = nullptr; #ifdef DEBUG @@ -792,14 +616,14 @@ // to be changed. SetOfMachineInstr PotentialCandidates; SetOfMachineInstr PotentialADROpportunities; - for (auto &Use : UseToDefs) { + for (auto &Use : ReachingDefs.getUseToDefs()) { // If no definition is available, this is a non candidate. if (Use.second.empty()) continue; // Keep only instructions that are load or store and at the end of // a ADRP -> ADD/LDR/Nothing chain. // We already filtered out the no-chain cases. - if (!isCandidate(Use.first, UseToDefs, MDT)) { + if (!isCandidate(Use.first, ReachingDefs.getUseToDefs(), MDT)) { PotentialADROpportunities.insert(Use.first); continue; } @@ -824,24 +648,23 @@ #ifdef DEBUG SetOfMachineInstr DefsOfPotentialCandidates; #endif - for (const MachineInstr *Candidate : PotentialCandidates) { + for (MachineInstr *Candidate : PotentialCandidates) { // Get the definition of the candidate i.e., ADD or LDR. - const MachineInstr *Def = *UseToDefs.find(Candidate)->second.begin(); + MachineInstr *Def = ReachingDefs.getDef(Candidate); // Record the elements of the chain. - const MachineInstr *L1 = Def; - const MachineInstr *L2 = nullptr; + MachineInstr *L1 = Def; + MachineInstr *L2 = nullptr; unsigned ImmediateDefOpc = Def->getOpcode(); if (Def->getOpcode() != AArch64::ADRP) { // Check the number of users of this node. const SetOfMachineInstr *Users = - getUses(DefsPerColorToUses, - RegToId.find(Def->getOperand(0).getReg())->second, *Def); + ReachingDefs.getUses(Def->getOperand(0).getReg(), *Def); if (Users->size() > 1) { #ifdef DEBUG // if all the uses of this def are in potential candidate, this is // a complex candidate of level 2. bool IsLevel2 = true; - for (const MachineInstr *MI : *Users) { + for (MachineInstr *MI : *Users) { if (!PotentialCandidates.count(MI)) { ++NumTooCplxLvl2; IsLevel2 = false; @@ -855,15 +678,14 @@ continue; } L2 = Def; - Def = *UseToDefs.find(Def)->second.begin(); + Def = *ReachingDefs.getUseToDefs().find(Def)->second.begin(); L1 = Def; } // else the element in the middle of the chain is nothing, thus // Def already contains the first element of the chain. // Check the number of users of the first node in the chain, i.e., ADRP const SetOfMachineInstr *Users = - getUses(DefsPerColorToUses, - RegToId.find(Def->getOperand(0).getReg())->second, *Def); + ReachingDefs.getUses(Def->getOperand(0).getReg(), *Def); if (Users->size() > 1) { #ifdef DEBUG // if all the uses of this def are in the defs of the potential candidate, @@ -871,10 +693,10 @@ if (DefsOfPotentialCandidates.empty()) { // lazy init DefsOfPotentialCandidates = PotentialCandidates; - for (const MachineInstr *Candidate : PotentialCandidates) { - if (!UseToDefs.find(Candidate)->second.empty()) + for (MachineInstr *Candidate : PotentialCandidates) { + if (!ReachingDefs.getUseToDefs().find(Candidate)->second.empty()) DefsOfPotentialCandidates.insert( - *UseToDefs.find(Candidate)->second.begin()); + *ReachingDefs.getUseToDefs().find(Candidate)->second.begin()); } } bool Found = false; @@ -986,118 +808,39 @@ } // Now, we grabbed all the big patterns, check ADR opportunities. - for (const MachineInstr *Candidate : PotentialADROpportunities) - registerADRCandidate(*Candidate, UseToDefs, DefsPerColorToUses, AArch64FI, - InvolvedInLOHs, RegToId); -} - -/// Look for every register defined by potential LOHs candidates. -/// Map these registers with dense id in @p RegToId and vice-versa in -/// @p IdToReg. @p IdToReg is populated only in DEBUG mode. -static void collectInvolvedReg(const MachineFunction &MF, MapRegToId &RegToId, - MapIdToReg &IdToReg, - const TargetRegisterInfo *TRI) { - unsigned CurRegId = 0; - if (!PreCollectRegister) { - unsigned NbReg = TRI->getNumRegs(); - for (; CurRegId < NbReg; ++CurRegId) { - RegToId[CurRegId] = CurRegId; - DEBUG(IdToReg.push_back(CurRegId)); - DEBUG(assert(IdToReg[CurRegId] == CurRegId && "Reg index mismatches")); - } - return; - } - - DEBUG(dbgs() << "** Collect Involved Register\n"); - for (const auto &MBB : MF) { - for (const MachineInstr &MI : MBB) { - if (!canDefBePartOfLOH(&MI) && - !isCandidateLoad(&MI) && !isCandidateStore(&MI)) - continue; - - // Process defs - for (MachineInstr::const_mop_iterator IO = MI.operands_begin(), - IOEnd = MI.operands_end(); - IO != IOEnd; ++IO) { - if (!IO->isReg() || !IO->isDef()) - continue; - unsigned CurReg = IO->getReg(); - for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) - if (RegToId.find(*AI) == RegToId.end()) { - DEBUG(IdToReg.push_back(*AI); - assert(IdToReg[CurRegId] == *AI && - "Reg index mismatches insertion index.")); - RegToId[*AI] = CurRegId++; - DEBUG(dbgs() << "Register: " << PrintReg(*AI, TRI) << '\n'); - } - } - } - } + for (MachineInstr *Candidate : PotentialADROpportunities) + registerADRCandidate(*Candidate, ReachingDefs, AArch64FI, InvolvedInLOHs); } bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) { const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const MachineDominatorTree *MDT = &getAnalysis(); - MapRegToId RegToId; - MapIdToReg IdToReg; AArch64FunctionInfo *AArch64FI = MF.getInfo(); assert(AArch64FI && "No MachineFunctionInfo for this function!"); DEBUG(dbgs() << "Looking for LOH in " << MF.getName() << '\n'); - collectInvolvedReg(MF, RegToId, IdToReg, TRI); - if (RegToId.empty()) - return false; - - MachineInstr *DummyOp = nullptr; - if (BasicBlockScopeOnly) { - const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); - // For local analysis, create a dummy operation to record uses that are not - // local. - DummyOp = MF.CreateMachineInstr(TII->get(AArch64::COPY), DebugLoc()); - } - - unsigned NbReg = RegToId.size(); bool Modified = false; // Start with ADRP. - InstrToInstrs *ColorOpToReachedUses = new InstrToInstrs[NbReg]; - - // Compute the reaching def in ADRP mode, meaning ADRP definitions - // are first considered as uses. - reachingDef(MF, ColorOpToReachedUses, RegToId, true, DummyOp); - DEBUG(dbgs() << "ADRP reaching defs\n"); - DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg)); - - // Translate the definition to uses map into a use to definitions map to ease - // statistic computation. - InstrToInstrs ADRPToReachingDefs; - reachedUsesToDefs(ADRPToReachingDefs, ColorOpToReachedUses, RegToId, true); + ReachingPhysDefs_ADRP ReachingDefs_ADRP(&MF, TRI, BasicBlockScopeOnly); + ReachingDefs_ADRP.addRegs(); + if (!ReachingDefs_ADRP.analyze()) + return false; + DEBUG(ReachingDefs_ADRP.dump();); // Compute LOH for ADRP. - computeADRP(ADRPToReachingDefs, *AArch64FI, MDT); - delete[] ColorOpToReachedUses; + computeADRP(ReachingDefs_ADRP, *AArch64FI, MDT); // Continue with general ADRP -> ADD/LDR -> LDR/STR pattern. - ColorOpToReachedUses = new InstrToInstrs[NbReg]; - - // first perform a regular reaching def analysis. - reachingDef(MF, ColorOpToReachedUses, RegToId, false, DummyOp); - DEBUG(dbgs() << "All reaching defs\n"); - DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg)); - - // Turn that into a use to defs to ease statistic computation. - InstrToInstrs UsesToReachingDefs; - reachedUsesToDefs(UsesToReachingDefs, ColorOpToReachedUses, RegToId, false); + ReachingPhysDefs_Others ReachingDefs(&MF, TRI, BasicBlockScopeOnly); + ReachingDefs.addRegs(); + ReachingDefs.analyze(); + DEBUG(ReachingDefs.dump();); // Compute other than AdrpAdrp LOH. - computeOthers(UsesToReachingDefs, ColorOpToReachedUses, *AArch64FI, RegToId, - MDT); - delete[] ColorOpToReachedUses; - - if (BasicBlockScopeOnly) - MF.DeleteMachineInstr(DummyOp); + computeOthers(ReachingDefs, *AArch64FI, MDT); return Modified; } Index: lib/Target/SystemZ/SystemZElimCompare.cpp =================================================================== --- lib/Target/SystemZ/SystemZElimCompare.cpp +++ lib/Target/SystemZ/SystemZElimCompare.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/ReachingPhysDefs.h" #include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/MathExtras.h" @@ -63,20 +64,20 @@ return "SystemZ Comparison Elimination"; } - bool processBlock(MachineBasicBlock &MBB); + bool processBlock(MachineBasicBlock &MBB, + ReachingPhysDefs &ReachingDefs); bool runOnMachineFunction(MachineFunction &F) override; private: Reference getRegReferences(MachineInstr *MI, unsigned Reg); bool convertToBRCT(MachineInstr *MI, MachineInstr *Compare, - SmallVectorImpl &CCUsers); + SetOfMachineInstr &CCUses); + bool convertToLoadAndTest(MachineInstr *MI); bool adjustCCMasksForInstr(MachineInstr *MI, MachineInstr *Compare, - SmallVectorImpl &CCUsers); - bool optimizeCompareZero(MachineInstr *Compare, - SmallVectorImpl &CCUsers); - bool fuseCompareAndBranch(MachineInstr *Compare, - SmallVectorImpl &CCUsers); + SetOfMachineInstr &CCUses); + bool optimizeCompareZero(MachineInstr *Compare, SetOfMachineInstr &CCUses); + bool fuseCompareAndBranch(MachineInstr *Compare, SetOfMachineInstr &CCUses); const SystemZInstrInfo *TII; const TargetRegisterInfo *TRI; @@ -89,14 +90,6 @@ return new SystemZElimCompare(TM); } -// Return true if CC is live out of MBB. -static bool isCCLiveOut(MachineBasicBlock &MBB) { - for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI) - if ((*SI)->isLiveIn(SystemZ::CC)) - return true; - return false; -} - // Return true if any CC result of MI would reflect the value of Reg. static bool resultTests(MachineInstr *MI, unsigned Reg) { if (MI->getNumOperands() > 0 && @@ -169,11 +162,11 @@ } // Compare compares the result of MI against zero. If MI is an addition -// of -1 and if CCUsers is a single branch on nonzero, eliminate the addition +// of -1 and if CCUses is a single branch on nonzero, eliminate the addition // and convert the branch to a BRCT(G). Return true on success. bool SystemZElimCompare::convertToBRCT(MachineInstr *MI, MachineInstr *Compare, - SmallVectorImpl &CCUsers) { + SetOfMachineInstr &CCUses) { // Check whether we have an addition of -1. unsigned Opcode = MI->getOpcode(); unsigned BRCT; @@ -187,9 +180,9 @@ return false; // Check whether we have a single JLH. - if (CCUsers.size() != 1) + if (CCUses.size() != 1) return false; - MachineInstr *Branch = CCUsers[0]; + MachineInstr *Branch = CCUses[0]; if (Branch->getOpcode() != SystemZ::BRC || Branch->getOperand(0).getImm() != SystemZ::CCMASK_ICMP || Branch->getOperand(1).getImm() != SystemZ::CCMASK_CMP_NE) @@ -231,14 +224,14 @@ return true; } -// The CC users in CCUsers are testing the result of a comparison of some +// The CC users in CCUses are testing the result of a comparison of some // value X against zero and we know that any CC value produced by MI -// would also reflect the value of X. Try to adjust CCUsers so that +// would also reflect the value of X. Try to adjust CCUses so that // they test the result of MI directly, returning true on success. // Leave everything unchanged on failure. bool SystemZElimCompare:: adjustCCMasksForInstr(MachineInstr *MI, MachineInstr *Compare, - SmallVectorImpl &CCUsers) { + SetOfMachineInstr &CCUses) { int Opcode = MI->getOpcode(); const MCInstrDesc &Desc = TII->get(Opcode); unsigned MIFlags = Desc.TSFlags; @@ -259,8 +252,8 @@ // Now check whether these flags are enough for all users. SmallVector AlterMasks; - for (unsigned int I = 0, E = CCUsers.size(); I != E; ++I) { - MachineInstr *MI = CCUsers[I]; + for (unsigned int I = 0, E = CCUses.size(); I != E; ++I) { + MachineInstr *MI = CCUses[I]; // Fail if this isn't a use of CC that we understand. unsigned Flags = MI->getDesc().TSFlags; @@ -329,11 +322,10 @@ // Try to optimize cases where comparison instruction Compare is testing // a value against zero. Return true on success and if Compare should be -// deleted as dead. CCUsers is the list of instructions that use the CC +// deleted as dead. CCUses is the list of instructions that use the CC // value produced by Compare. bool SystemZElimCompare:: -optimizeCompareZero(MachineInstr *Compare, - SmallVectorImpl &CCUsers) { +optimizeCompareZero(MachineInstr *Compare, SetOfMachineInstr &CCUses) { if (!isCompareZero(Compare)) return false; @@ -350,13 +342,13 @@ // Try to remove both MI and Compare by converting a branch to BRCT(G). // We don't care in this case whether CC is modified between MI and // Compare. - if (!CCRefs.Use && !SrcRefs && convertToBRCT(MI, Compare, CCUsers)) { + if (!CCRefs.Use && !SrcRefs && convertToBRCT(MI, Compare, CCUses)) { BranchOnCounts += 1; return true; } // Try to eliminate Compare by reusing a CC result from MI. if ((!CCRefs && convertToLoadAndTest(MI)) || - (!CCRefs.Def && adjustCCMasksForInstr(MI, Compare, CCUsers))) { + (!CCRefs.Def && adjustCCMasksForInstr(MI, Compare, CCUses))) { EliminatedComparisons += 1; return true; } @@ -374,8 +366,7 @@ // Try to fuse comparison instruction Compare into a later branch. // Return true on success and if Compare is therefore redundant. bool SystemZElimCompare:: -fuseCompareAndBranch(MachineInstr *Compare, - SmallVectorImpl &CCUsers) { +fuseCompareAndBranch(MachineInstr *Compare, SetOfMachineInstr &CCUses) { // See whether we have a comparison that can be fused. unsigned FusedOpcode = TII->getCompareAndBranch(Compare->getOpcode(), Compare); @@ -383,9 +374,9 @@ return false; // See whether we have a single branch with which to fuse. - if (CCUsers.size() != 1) + if (CCUses.size() != 1) return false; - MachineInstr *Branch = CCUsers[0]; + MachineInstr *Branch = CCUses[0]; if (Branch->getOpcode() != SystemZ::BRC) return false; @@ -435,35 +426,29 @@ // Process all comparison instructions in MBB. Return true if something // changed. -bool SystemZElimCompare::processBlock(MachineBasicBlock &MBB) { +bool SystemZElimCompare::processBlock(MachineBasicBlock &MBB, + ReachingPhysDefs &ReachingDefs) { bool Changed = false; // Walk backwards through the block looking for comparisons, recording // all CC users as we go. The subroutines can delete Compare and // instructions before it. - bool CompleteCCUsers = !isCCLiveOut(MBB); - SmallVector CCUsers; MachineBasicBlock::iterator MBBI = MBB.end(); while (MBBI != MBB.begin()) { MachineInstr *MI = --MBBI; - if (CompleteCCUsers && - (MI->isCompare() || isLoadAndTestAsCmp(MI)) && - (optimizeCompareZero(MI, CCUsers) || - fuseCompareAndBranch(MI, CCUsers))) { - ++MBBI; - MI->eraseFromParent(); - Changed = true; - CCUsers.clear(); - continue; - } - - if (MI->definesRegister(SystemZ::CC)) { - CCUsers.clear(); - CompleteCCUsers = true; + if (MI->isCompare() || isLoadAndTestAsCmp(MI)) { + SetOfMachineInstr *CCUses = ReachingDefs.getUses(SystemZ::CC, *MI); + + if (CCUses != nullptr && + (optimizeCompareZero(MI, *CCUses) || + fuseCompareAndBranch(MI, *CCUses))) { + ++MBBI; + MI->eraseFromParent(); + Changed = true; + } } - if (MI->readsRegister(SystemZ::CC) && CompleteCCUsers) - CCUsers.push_back(MI); } + return Changed; } @@ -471,9 +456,19 @@ TII = static_cast(F.getSubtarget().getInstrInfo()); TRI = &TII->getRegisterInfo(); + // Use the ReachingPhysDefs class to do a global mapping from all + // defs / users of the CC reg. This is helpful in cases where late + // optimizers have caused some CC users to lie in other blocks than + // the compare instruction. + // This is only computed once even though F is being operated on. + ReachingPhysDefs ReachingDefs(&F, TRI, false, "Reaching CC defs"); + ReachingDefs.analyze(SystemZ::CC); + ReachingDefs.pruneMultipleDefs(); + DEBUG(ReachingDefs.dump()); + bool Changed = false; for (auto &MBB : F) - Changed |= processBlock(MBB); + Changed |= processBlock(MBB, ReachingDefs); return Changed; }