Index: llvm/trunk/lib/Target/AArch64/AArch64CollectLOH.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64CollectLOH.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64CollectLOH.cpp @@ -110,72 +110,34 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; #define DEBUG_TYPE "aarch64-collect-loh" -static cl::opt -PreCollectRegister("aarch64-collect-loh-pre-collect-register", cl::Hidden, - cl::desc("Restrict analysis to registers invovled" - " in LOHs"), - cl::init(true)); - -static cl::opt -BasicBlockScopeOnly("aarch64-collect-loh-bb-only", cl::Hidden, - cl::desc("Restrict analysis at basic block scope"), - cl::init(true)); - STATISTIC(NumADRPSimpleCandidate, "Number of simplifiable ADRP dominate by another"); -#ifndef NDEBUG -STATISTIC(NumADRPComplexCandidate2, - "Number of simplifiable ADRP reachable by 2 defs"); -STATISTIC(NumADRPComplexCandidate3, - "Number of simplifiable ADRP reachable by 3 defs"); -STATISTIC(NumADRPComplexCandidateOther, - "Number of simplifiable ADRP reachable by 4 or more defs"); -STATISTIC(NumADDToSTRWithImm, - "Number of simplifiable STR with imm reachable by ADD"); -STATISTIC(NumLDRToSTRWithImm, - "Number of simplifiable STR with imm reachable by LDR"); STATISTIC(NumADDToSTR, "Number of simplifiable STR reachable by ADD"); STATISTIC(NumLDRToSTR, "Number of simplifiable STR reachable by LDR"); -STATISTIC(NumADDToLDRWithImm, - "Number of simplifiable LDR with imm reachable by ADD"); -STATISTIC(NumLDRToLDRWithImm, - "Number of simplifiable LDR with imm reachable by LDR"); STATISTIC(NumADDToLDR, "Number of simplifiable LDR reachable by ADD"); STATISTIC(NumLDRToLDR, "Number of simplifiable LDR reachable by LDR"); -#endif // NDEBUG STATISTIC(NumADRPToLDR, "Number of simplifiable LDR reachable by ADRP"); -#ifndef NDEBUG -STATISTIC(NumCplxLvl1, "Number of complex case of level 1"); -STATISTIC(NumTooCplxLvl1, "Number of too complex case of level 1"); -STATISTIC(NumCplxLvl2, "Number of complex case of level 2"); -STATISTIC(NumTooCplxLvl2, "Number of too complex case of level 2"); -#endif // NDEBUG STATISTIC(NumADRSimpleCandidate, "Number of simplifiable ADRP + ADD"); -STATISTIC(NumADRComplexCandidate, "Number of too complex ADRP + ADD"); #define AARCH64_COLLECT_LOH_NAME "AArch64 Collect Linker Optimization Hint (LOH)" namespace { + struct AArch64CollectLOH : public MachineFunctionPass { static char ID; - AArch64CollectLOH() : MachineFunctionPass(ID) { - initializeAArch64CollectLOHPass(*PassRegistry::getPassRegistry()); - } + AArch64CollectLOH() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; @@ -187,351 +149,57 @@ StringRef getPassName() const override { return AARCH64_COLLECT_LOH_NAME; } void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); - AU.addRequired(); + AU.setPreservesAll(); } - -private: }; -/// A set of MachineInstruction. -typedef SetVector SetOfMachineInstr; -/// Map a basic block to a set of instructions per register. -/// This is used to represent the exposed uses of a basic block -/// per register. -typedef MapVector> -BlockToSetOfInstrsPerColor; -/// Map a basic block to an instruction per register. -/// This is used to represent the live-out definitions of a basic block -/// per register. -typedef MapVector> -BlockToInstrPerColor; -/// Map an instruction to a set of instructions. Used to represent the -/// mapping def to reachable uses or use to definitions. -typedef MapVector InstrToInstrs; -/// Map a basic block to a BitVector. -/// This is used to record the kill registers per basic block. -typedef MapVector BlockToRegSet; - -/// Map a register to a dense id. -typedef DenseMap MapRegToId; -/// Map a dense id to a register. Used for debug purposes. -typedef SmallVector MapIdToReg; -} // end anonymous namespace. - char AArch64CollectLOH::ID = 0; -INITIALIZE_PASS_BEGIN(AArch64CollectLOH, "aarch64-collect-loh", - AARCH64_COLLECT_LOH_NAME, false, false) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_END(AArch64CollectLOH, "aarch64-collect-loh", - AARCH64_COLLECT_LOH_NAME, false, false) - -/// Given a couple (MBB, reg) get the corresponding set of instruction from -/// the given "sets". -/// If this couple does not reference any set, an empty set is added to "sets" -/// for this couple and returned. -/// \param nbRegs is used internally allocate some memory. It must be consistent -/// with the way sets is used. -static SetOfMachineInstr &getSet(BlockToSetOfInstrsPerColor &sets, - const MachineBasicBlock &MBB, unsigned reg, - unsigned nbRegs) { - SetOfMachineInstr *result; - BlockToSetOfInstrsPerColor::iterator it = sets.find(&MBB); - if (it != sets.end()) - result = it->second.get(); - else - result = (sets[&MBB] = make_unique(nbRegs)).get(); - - return result[reg]; -} - -/// Given a couple (reg, MI) get the corresponding set of instructions from the -/// the given "sets". -/// This is used to get the uses record in sets of a definition identified by -/// MI and reg, i.e., MI defines reg. -/// If the couple does not reference anything, an empty set is added to -/// "sets[reg]". -/// \pre set[reg] is valid. -static SetOfMachineInstr &getUses(InstrToInstrs *sets, unsigned reg, - const MachineInstr &MI) { - return sets[reg][&MI]; -} - -/// Same as getUses but does not modify the input map: sets. -/// \return NULL if the couple (reg, MI) is not in sets. -static const SetOfMachineInstr *getUses(const InstrToInstrs *sets, unsigned reg, - const MachineInstr &MI) { - InstrToInstrs::const_iterator Res = sets[reg].find(&MI); - if (Res != sets[reg].end()) - return &(Res->second); - return nullptr; -} - -/// Initialize the reaching definition algorithm: -/// For each basic block BB in MF, record: -/// - its kill set. -/// - its reachable uses (uses that are exposed to BB's predecessors). -/// - its the generated definitions. -/// \param DummyOp if not NULL, specifies a Dummy Operation to be added to -/// the list of uses of exposed defintions. -/// \param ADRPMode specifies to only consider ADRP instructions for generated -/// definition. It also consider definitions of ADRP instructions as uses and -/// ignore other uses. The ADRPMode is used to collect the information for LHO -/// that involve ADRP operation only. -static void initReachingDef(const MachineFunction &MF, - InstrToInstrs *ColorOpToReachedUses, - BlockToInstrPerColor &Gen, BlockToRegSet &Kill, - BlockToSetOfInstrsPerColor &ReachableUses, - const MapRegToId &RegToId, - const MachineInstr *DummyOp, bool ADRPMode) { - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - unsigned NbReg = RegToId.size(); - - for (const MachineBasicBlock &MBB : MF) { - auto &BBGen = Gen[&MBB]; - BBGen = make_unique(NbReg); - std::fill(BBGen.get(), BBGen.get() + NbReg, nullptr); - - BitVector &BBKillSet = Kill[&MBB]; - BBKillSet.resize(NbReg); - for (const MachineInstr &MI : MBB) { - bool IsADRP = MI.getOpcode() == AArch64::ADRP; - - // Process uses first. - if (IsADRP || !ADRPMode) - for (const MachineOperand &MO : MI.operands()) { - // Treat ADRP def as use, as the goal of the analysis is to find - // ADRP defs reached by other ADRP defs. - if (!MO.isReg() || (!ADRPMode && !MO.isUse()) || - (ADRPMode && (!IsADRP || !MO.isDef()))) - continue; - unsigned CurReg = MO.getReg(); - MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg); - if (ItCurRegId == RegToId.end()) - continue; - CurReg = ItCurRegId->second; - - // if CurReg has not been defined, this use is reachable. - if (!BBGen[CurReg] && !BBKillSet.test(CurReg)) - getSet(ReachableUses, MBB, CurReg, NbReg).insert(&MI); - // current basic block definition for this color, if any, is in Gen. - if (BBGen[CurReg]) - getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(&MI); - } - - // Process clobbers. - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isRegMask()) - continue; - // Clobbers kill the related colors. - const uint32_t *PreservedRegs = MO.getRegMask(); - - // Set generated regs. - for (const auto &Entry : RegToId) { - unsigned Reg = Entry.second; - // Use the global register ID when querying APIs external to this - // pass. - if (MachineOperand::clobbersPhysReg(PreservedRegs, Entry.first)) { - // Do not register clobbered definition for no ADRP. - // This definition is not used anyway (otherwise register - // allocation is wrong). - BBGen[Reg] = ADRPMode ? &MI : nullptr; - BBKillSet.set(Reg); - } - } - } - - // Process register defs. - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isReg() || !MO.isDef()) - continue; - unsigned CurReg = MO.getReg(); - MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg); - if (ItCurRegId == RegToId.end()) - continue; - - for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) { - MapRegToId::const_iterator ItRegId = RegToId.find(*AI); - // If this alias has not been recorded, then it is not interesting - // for the current analysis. - // We can end up in this situation because of tuple registers. - // E.g., Let say we are interested in S1. When we register - // S1, we will also register its aliases and in particular - // the tuple Q1_Q2. - // Now, when we encounter Q1_Q2, we will look through its aliases - // and will find that S2 is not registered. - if (ItRegId == RegToId.end()) - continue; - - BBKillSet.set(ItRegId->second); - BBGen[ItRegId->second] = &MI; - } - BBGen[ItCurRegId->second] = &MI; - } - } - - // If we restrict our analysis to basic block scope, conservatively add a - // dummy - // use for each generated value. - if (!ADRPMode && DummyOp && !MBB.succ_empty()) - for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg) - if (BBGen[CurReg]) - getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(DummyOp); - } -} - -/// Reaching def core algorithm: -/// while an Out has changed -/// for each bb -/// for each color -/// In[bb][color] = U Out[bb.predecessors][color] -/// insert reachableUses[bb][color] in each in[bb][color] -/// op.reachedUses -/// -/// Out[bb] = Gen[bb] U (In[bb] - Kill[bb]) -static void reachingDefAlgorithm(const MachineFunction &MF, - InstrToInstrs *ColorOpToReachedUses, - BlockToSetOfInstrsPerColor &In, - BlockToSetOfInstrsPerColor &Out, - BlockToInstrPerColor &Gen, BlockToRegSet &Kill, - BlockToSetOfInstrsPerColor &ReachableUses, - unsigned NbReg) { - bool HasChanged; - do { - HasChanged = false; - for (const MachineBasicBlock &MBB : MF) { - unsigned CurReg; - for (CurReg = 0; CurReg < NbReg; ++CurReg) { - SetOfMachineInstr &BBInSet = getSet(In, MBB, CurReg, NbReg); - SetOfMachineInstr &BBReachableUses = - getSet(ReachableUses, MBB, CurReg, NbReg); - SetOfMachineInstr &BBOutSet = getSet(Out, MBB, CurReg, NbReg); - unsigned Size = BBOutSet.size(); - // In[bb][color] = U Out[bb.predecessors][color] - for (const MachineBasicBlock *PredMBB : MBB.predecessors()) { - SetOfMachineInstr &PredOutSet = getSet(Out, *PredMBB, CurReg, NbReg); - BBInSet.insert(PredOutSet.begin(), PredOutSet.end()); - } - // insert reachableUses[bb][color] in each in[bb][color] op.reachedses - for (const MachineInstr *MI : BBInSet) { - SetOfMachineInstr &OpReachedUses = - getUses(ColorOpToReachedUses, CurReg, *MI); - OpReachedUses.insert(BBReachableUses.begin(), BBReachableUses.end()); - } - // Out[bb] = Gen[bb] U (In[bb] - Kill[bb]) - if (!Kill[&MBB].test(CurReg)) - BBOutSet.insert(BBInSet.begin(), BBInSet.end()); - if (Gen[&MBB][CurReg]) - BBOutSet.insert(Gen[&MBB][CurReg]); - HasChanged |= BBOutSet.size() != Size; - } - } - } while (HasChanged); -} - -/// Reaching definition algorithm. -/// \param MF function on which the algorithm will operate. -/// \param[out] ColorOpToReachedUses will contain the result of the reaching -/// def algorithm. -/// \param ADRPMode specify whether the reaching def algorithm should be tuned -/// for ADRP optimization. \see initReachingDef for more details. -/// \param DummyOp if not NULL, the algorithm will work at -/// basic block scope and will set for every exposed definition a use to -/// @p DummyOp. -/// \pre ColorOpToReachedUses is an array of at least number of registers of -/// InstrToInstrs. -static void reachingDef(const MachineFunction &MF, - InstrToInstrs *ColorOpToReachedUses, - const MapRegToId &RegToId, bool ADRPMode = false, - const MachineInstr *DummyOp = nullptr) { - // structures: - // For each basic block. - // Out: a set per color of definitions that reach the - // out boundary of this block. - // In: Same as Out but for in boundary. - // Gen: generated color in this block (one operation per color). - // Kill: register set of killed color in this block. - // ReachableUses: a set per color of uses (operation) reachable - // for "In" definitions. - BlockToSetOfInstrsPerColor Out, In, ReachableUses; - BlockToInstrPerColor Gen; - BlockToRegSet Kill; - - // Initialize Gen, kill and reachableUses. - initReachingDef(MF, ColorOpToReachedUses, Gen, Kill, ReachableUses, RegToId, - DummyOp, ADRPMode); - - // Algo. - if (!DummyOp) - reachingDefAlgorithm(MF, ColorOpToReachedUses, In, Out, Gen, Kill, - ReachableUses, RegToId.size()); -} +} // end anonymous namespace. -#ifndef NDEBUG -/// print the result of the reaching definition algorithm. -static void printReachingDef(const InstrToInstrs *ColorOpToReachedUses, - unsigned NbReg, const TargetRegisterInfo *TRI, - const MapIdToReg &IdToReg) { - unsigned CurReg; - for (CurReg = 0; CurReg < NbReg; ++CurReg) { - if (ColorOpToReachedUses[CurReg].empty()) - continue; - DEBUG(dbgs() << "*** Reg " << PrintReg(IdToReg[CurReg], TRI) << " ***\n"); +INITIALIZE_PASS(AArch64CollectLOH, "aarch64-collect-loh", + AARCH64_COLLECT_LOH_NAME, false, false) - for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) { - DEBUG(dbgs() << "Def:\n"); - DEBUG(DefsIt.first->print(dbgs())); - DEBUG(dbgs() << "Reachable uses:\n"); - for (const MachineInstr *MI : DefsIt.second) { - DEBUG(MI->print(dbgs())); - } - } +static bool canAddBePartOfLOH(const MachineInstr &MI) { + // Check immediate to see if the immediate is an address. + switch (MI.getOperand(2).getType()) { + default: + return false; + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_JumpTableIndex: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_BlockAddress: + return true; } } -#endif // NDEBUG /// Answer the following question: Can Def be one of the definition /// involved in a part of a LOH? -static bool canDefBePartOfLOH(const MachineInstr *Def) { - unsigned Opc = Def->getOpcode(); +static bool canDefBePartOfLOH(const MachineInstr &MI) { // Accept ADRP, ADDLow and LOADGot. - switch (Opc) { + switch (MI.getOpcode()) { default: return false; case AArch64::ADRP: return true; case AArch64::ADDXri: - // Check immediate to see if the immediate is an address. - switch (Def->getOperand(2).getType()) { - default: - return false; - case MachineOperand::MO_GlobalAddress: - case MachineOperand::MO_JumpTableIndex: - case MachineOperand::MO_ConstantPoolIndex: - case MachineOperand::MO_BlockAddress: - return true; - } + return canAddBePartOfLOH(MI); case AArch64::LDRXui: // Check immediate to see if the immediate is an address. - switch (Def->getOperand(2).getType()) { + switch (MI.getOperand(2).getType()) { default: return false; case MachineOperand::MO_GlobalAddress: - return true; + return MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT; } } - // Unreachable. - return false; } /// Check whether the given instruction can the end of a LOH chain involving a /// store. -static bool isCandidateStore(const MachineInstr *Instr) { - switch (Instr->getOpcode()) { +static bool isCandidateStore(const MachineInstr &MI, const MachineOperand &MO) { + switch (MI.getOpcode()) { default: return false; case AArch64::STRBBui: @@ -543,109 +211,19 @@ case AArch64::STRSui: case AArch64::STRDui: case AArch64::STRQui: + // We can only optimize the index operand. // In case we have str xA, [xA, #imm], this is two different uses // of xA and we cannot fold, otherwise the xA stored may be wrong, // even if #imm == 0. - if (Instr->getOperand(0).getReg() != Instr->getOperand(1).getReg()) - return true; - } - return false; -} - -/// Given the result of a reaching definition algorithm in ColorOpToReachedUses, -/// Build the Use to Defs information and filter out obvious non-LOH candidates. -/// In ADRPMode, non-LOH candidates are "uses" with non-ADRP definitions. -/// In non-ADRPMode, non-LOH candidates are "uses" with several definition, -/// i.e., no simple chain. -/// \param ADRPMode -- \see initReachingDef. -static void reachedUsesToDefs(InstrToInstrs &UseToReachingDefs, - const InstrToInstrs *ColorOpToReachedUses, - const MapRegToId &RegToId, - bool ADRPMode = false) { - - SetOfMachineInstr NotCandidate; - unsigned NbReg = RegToId.size(); - MapRegToId::const_iterator EndIt = RegToId.end(); - for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg) { - // If this color is never defined, continue. - if (ColorOpToReachedUses[CurReg].empty()) - continue; - - for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) { - for (const MachineInstr *MI : DefsIt.second) { - const MachineInstr *Def = DefsIt.first; - MapRegToId::const_iterator It; - // if all the reaching defs are not adrp, this use will not be - // simplifiable. - if ((ADRPMode && Def->getOpcode() != AArch64::ADRP) || - (!ADRPMode && !canDefBePartOfLOH(Def)) || - (!ADRPMode && isCandidateStore(MI) && - // store are LOH candidate iff the end of the chain is used as - // base. - ((It = RegToId.find((MI)->getOperand(1).getReg())) == EndIt || - It->second != CurReg))) { - NotCandidate.insert(MI); - continue; - } - // Do not consider self reaching as a simplifiable case for ADRP. - if (!ADRPMode || MI != DefsIt.first) { - UseToReachingDefs[MI].insert(DefsIt.first); - // If UsesIt has several reaching definitions, it is not - // candidate for simplificaton in non-ADRPMode. - if (!ADRPMode && UseToReachingDefs[MI].size() > 1) - NotCandidate.insert(MI); - } - } - } - } - for (const MachineInstr *Elem : NotCandidate) { - DEBUG(dbgs() << "Too many reaching defs: " << *Elem << "\n"); - // It would have been better if we could just remove the entry - // from the map. Because of that, we have to filter the garbage - // (second.empty) in the subsequence analysis. - UseToReachingDefs[Elem].clear(); - } -} - -/// Based on the use to defs information (in ADRPMode), compute the -/// opportunities of LOH ADRP-related. -static void computeADRP(const InstrToInstrs &UseToDefs, - AArch64FunctionInfo &AArch64FI, - const MachineDominatorTree *MDT) { - DEBUG(dbgs() << "*** Compute LOH for ADRP\n"); - for (const auto &Entry : UseToDefs) { - unsigned Size = Entry.second.size(); - if (Size == 0) - continue; - if (Size == 1) { - const MachineInstr *L2 = *Entry.second.begin(); - const MachineInstr *L1 = Entry.first; - if (!MDT->dominates(L2, L1)) { - DEBUG(dbgs() << "Dominance check failed:\n" << *L2 << '\n' << *L1 - << '\n'); - continue; - } - DEBUG(dbgs() << "Record AdrpAdrp:\n" << *L2 << '\n' << *L1 << '\n'); - AArch64FI.addLOHDirective(MCLOH_AdrpAdrp, {L2, L1}); - ++NumADRPSimpleCandidate; - } -#ifndef NDEBUG - else if (Size == 2) - ++NumADRPComplexCandidate2; - else if (Size == 3) - ++NumADRPComplexCandidate3; - else - ++NumADRPComplexCandidateOther; -#endif - // if Size < 1, the use should have been removed from the candidates - assert(Size >= 1 && "No reaching defs for that use!"); + return MI.getOperandNo(&MO) == 1 && + MI.getOperand(0).getReg() != MI.getOperand(1).getReg(); } } /// Check whether the given instruction can be the end of a LOH chain /// involving a load. -static bool isCandidateLoad(const MachineInstr *Instr) { - switch (Instr->getOpcode()) { +static bool isCandidateLoad(const MachineInstr &MI) { + switch (MI.getOpcode()) { default: return false; case AArch64::LDRSBWui: @@ -660,17 +238,13 @@ case AArch64::LDRSui: case AArch64::LDRDui: case AArch64::LDRQui: - if (Instr->getOperand(2).getTargetFlags() & AArch64II::MO_GOT) - return false; - return true; + return !(MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT); } - // Unreachable. - return false; } /// Check whether the given instruction can load a litteral. -static bool supportLoadFromLiteral(const MachineInstr *Instr) { - switch (Instr->getOpcode()) { +static bool supportLoadFromLiteral(const MachineInstr &MI) { + switch (MI.getOpcode()) { default: return false; case AArch64::LDRSWui: @@ -681,353 +255,232 @@ case AArch64::LDRQui: return true; } - // Unreachable. - return false; } -/// Check whether the given instruction is a LOH candidate. -/// \param UseToDefs is used to check that Instr is at the end of LOH supported -/// chain. -/// \pre UseToDefs contains only on def per use, i.e., obvious non candidate are -/// already been filtered out. -static bool isCandidate(const MachineInstr *Instr, - const InstrToInstrs &UseToDefs, - const MachineDominatorTree *MDT) { - if (!isCandidateLoad(Instr) && !isCandidateStore(Instr)) - return false; +/// Number of GPR registers traked by mapRegToGPRIndex() +static const unsigned N_GPR_REGS = 31; +/// Map register number to index from 0-30. +static int mapRegToGPRIndex(MCPhysReg Reg) { + static_assert(AArch64::X28 - AArch64::X0 + 3 == N_GPR_REGS, "Number of GPRs"); + static_assert(AArch64::W30 - AArch64::W0 + 1 == N_GPR_REGS, "Number of GPRs"); + if (AArch64::X0 <= Reg && Reg <= AArch64::X28) + return Reg - AArch64::X0; + if (AArch64::W0 <= Reg && Reg <= AArch64::W30) + return Reg - AArch64::W0; + // TableGen gives "FP" and "LR" an index not adjacent to X28 so we have to + // handle them as special cases. + if (Reg == AArch64::FP) + return 29; + if (Reg == AArch64::LR) + return 30; + return -1; +} + +/// State tracked per register. +/// The main algorithm walks backwards over a basic block maintaining this +/// datastructure for each tracked general purpose register. +struct LOHInfo { + MCLOHType Type : 8; ///< "Best" type of LOH possible. + bool IsCandidate : 1; ///< Possible LOH candidate. + bool OneUser : 1; ///< Found exactly one user (yet). + bool MultiUsers : 1; ///< Found multiple users. + const MachineInstr *MI0; ///< First instruction involved in the LOH. + const MachineInstr *MI1; ///< Second instruction involved in the LOH + /// (if any). + const MachineInstr *LastADRP; ///< Last ADRP in same register. +}; - const MachineInstr *Def = *UseToDefs.find(Instr)->second.begin(); - if (Def->getOpcode() != AArch64::ADRP) { - // At this point, Def is ADDXri or LDRXui of the right type of - // symbol, because we filtered out the uses that were not defined - // by these kind of instructions (+ ADRP). - - // Check if this forms a simple chain: each intermediate node must - // dominates the next one. - if (!MDT->dominates(Def, Instr)) - return false; - // Move one node up in the simple chain. - if (UseToDefs.find(Def) == - UseToDefs.end() - // The map may contain garbage we have to ignore. - || - UseToDefs.find(Def)->second.empty()) - return false; - Instr = Def; - Def = *UseToDefs.find(Def)->second.begin(); +/// Update state \p Info given \p MI uses the tracked register. +static void handleUse(const MachineInstr &MI, const MachineOperand &MO, + LOHInfo &Info) { + // We have multiple uses if we already found one before. + if (Info.MultiUsers || Info.OneUser) { + Info.IsCandidate = false; + Info.MultiUsers = true; + return; } - // Check if we reached the top of the simple chain: - // - top is ADRP. - // - check the simple chain property: each intermediate node must - // dominates the next one. - if (Def->getOpcode() == AArch64::ADRP) - return MDT->dominates(Def, Instr); - return false; -} + Info.OneUser = true; -static bool registerADRCandidate(const MachineInstr &Use, - const InstrToInstrs &UseToDefs, - const InstrToInstrs *DefsPerColorToUses, - AArch64FunctionInfo &AArch64FI, - SetOfMachineInstr *InvolvedInLOHs, - const MapRegToId &RegToId) { - // Look for opportunities to turn ADRP -> ADD or - // ADRP -> LDR GOTPAGEOFF into ADR. - // If ADRP has more than one use. Give up. - if (Use.getOpcode() != AArch64::ADDXri && - (Use.getOpcode() != AArch64::LDRXui || - !(Use.getOperand(2).getTargetFlags() & AArch64II::MO_GOT))) - return false; - InstrToInstrs::const_iterator It = UseToDefs.find(&Use); - // The map may contain garbage that we need to ignore. - if (It == UseToDefs.end() || It->second.empty()) - return false; - const MachineInstr &Def = **It->second.begin(); - if (Def.getOpcode() != AArch64::ADRP) - return false; - // Check the number of users of ADRP. - const SetOfMachineInstr *Users = - getUses(DefsPerColorToUses, - RegToId.find(Def.getOperand(0).getReg())->second, Def); - if (Users->size() > 1) { - ++NumADRComplexCandidate; - return false; + // Start new LOHInfo if applicable. + if (isCandidateLoad(MI)) { + Info.Type = MCLOH_AdrpLdr; + Info.IsCandidate = true; + Info.MI0 = &MI; + // Note that even this is AdrpLdr now, we can switch to a Ldr variant + // later. + } else if (isCandidateStore(MI, MO)) { + Info.Type = MCLOH_AdrpAddStr; + Info.IsCandidate = true; + Info.MI0 = &MI; + Info.MI1 = nullptr; + } else if (MI.getOpcode() == AArch64::ADDXri) { + Info.Type = MCLOH_AdrpAdd; + Info.IsCandidate = true; + Info.MI0 = &MI; + } else if (MI.getOpcode() == AArch64::LDRXui && + MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) { + Info.Type = MCLOH_AdrpLdrGot; + Info.IsCandidate = true; + Info.MI0 = &MI; } - ++NumADRSimpleCandidate; - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Def)) && - "ADRP already involved in LOH."); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Use)) && - "ADD already involved in LOH."); - DEBUG(dbgs() << "Record AdrpAdd\n" << Def << '\n' << Use << '\n'); - - AArch64FI.addLOHDirective( - Use.getOpcode() == AArch64::ADDXri ? MCLOH_AdrpAdd : MCLOH_AdrpLdrGot, - {&Def, &Use}); - return true; } -/// Based on the use to defs information (in non-ADRPMode), compute the -/// opportunities of LOH non-ADRP-related -static void computeOthers(const InstrToInstrs &UseToDefs, - const InstrToInstrs *DefsPerColorToUses, - AArch64FunctionInfo &AArch64FI, const MapRegToId &RegToId, - const MachineDominatorTree *MDT) { - SetOfMachineInstr *InvolvedInLOHs = nullptr; -#ifndef NDEBUG - SetOfMachineInstr InvolvedInLOHsStorage; - InvolvedInLOHs = &InvolvedInLOHsStorage; -#endif // NDEBUG - DEBUG(dbgs() << "*** Compute LOH for Others\n"); - // ADRP -> ADD/LDR -> LDR/STR pattern. - // Fall back to ADRP -> ADD pattern if we fail to catch the bigger pattern. - - // FIXME: When the statistics are not important, - // This initial filtering loop can be merged into the next loop. - // Currently, we didn't do it to have the same code for both DEBUG and - // NDEBUG builds. Indeed, the iterator of the second loop would need - // to be changed. - SetOfMachineInstr PotentialCandidates; - SetOfMachineInstr PotentialADROpportunities; - for (auto &Use : UseToDefs) { - // If no definition is available, this is a non candidate. - if (Use.second.empty()) - continue; - // Keep only instructions that are load or store and at the end of - // a ADRP -> ADD/LDR/Nothing chain. - // We already filtered out the no-chain cases. - if (!isCandidate(Use.first, UseToDefs, MDT)) { - PotentialADROpportunities.insert(Use.first); - continue; +/// Update state \p Info given the tracked register is clobbered. +static void handleClobber(LOHInfo &Info) { + Info.IsCandidate = false; + Info.OneUser = false; + Info.MultiUsers = false; + Info.LastADRP = nullptr; +} + +/// Update state \p Info given that \p MI is possibly the middle instruction +/// of an LOH involving 3 instructions. +static bool handleMiddleInst(const MachineInstr &MI, LOHInfo &DefInfo, + LOHInfo &OpInfo) { + if (!DefInfo.IsCandidate || (&DefInfo != &OpInfo && OpInfo.OneUser)) + return false; + // Copy LOHInfo for dest register to LOHInfo for source register. + if (&DefInfo != &OpInfo) { + OpInfo = DefInfo; + // Invalidate \p DefInfo because we track it in \p OpInfo now. + handleClobber(DefInfo); + } + + // Advance state machine. + assert(OpInfo.IsCandidate && "Expect valid state"); + if (MI.getOpcode() == AArch64::ADDXri && canAddBePartOfLOH(MI)) { + if (OpInfo.Type == MCLOH_AdrpLdr) { + OpInfo.Type = MCLOH_AdrpAddLdr; + OpInfo.IsCandidate = true; + OpInfo.MI1 = &MI; + return true; + } else if (OpInfo.Type == MCLOH_AdrpAddStr && OpInfo.MI1 == nullptr) { + OpInfo.Type = MCLOH_AdrpAddStr; + OpInfo.IsCandidate = true; + OpInfo.MI1 = &MI; + return true; } - PotentialCandidates.insert(Use.first); - } - - // Make the following distinctions for statistics as the linker does - // know how to decode instructions: - // - ADD/LDR/Nothing make there different patterns. - // - LDR/STR make two different patterns. - // Hence, 6 - 1 base patterns. - // (because ADRP-> Nothing -> STR is not simplifiable) - - // The linker is only able to have a simple semantic, i.e., if pattern A - // do B. - // However, we want to see the opportunity we may miss if we were able to - // catch more complex cases. - - // PotentialCandidates are result of a chain ADRP -> ADD/LDR -> - // A potential candidate becomes a candidate, if its current immediate - // operand is zero and all nodes of the chain have respectively only one user -#ifndef NDEBUG - SetOfMachineInstr DefsOfPotentialCandidates; -#endif - for (const MachineInstr *Candidate : PotentialCandidates) { - // Get the definition of the candidate i.e., ADD or LDR. - const MachineInstr *Def = *UseToDefs.find(Candidate)->second.begin(); - // Record the elements of the chain. - const MachineInstr *L1 = Def; - const MachineInstr *L2 = nullptr; - unsigned ImmediateDefOpc = Def->getOpcode(); - if (Def->getOpcode() != AArch64::ADRP) { - // Check the number of users of this node. - const SetOfMachineInstr *Users = - getUses(DefsPerColorToUses, - RegToId.find(Def->getOperand(0).getReg())->second, *Def); - if (Users->size() > 1) { -#ifndef NDEBUG - // if all the uses of this def are in potential candidate, this is - // a complex candidate of level 2. - bool IsLevel2 = true; - for (const MachineInstr *MI : *Users) { - if (!PotentialCandidates.count(MI)) { - ++NumTooCplxLvl2; - IsLevel2 = false; - break; - } - } - if (IsLevel2) - ++NumCplxLvl2; -#endif // NDEBUG - PotentialADROpportunities.insert(Def); - continue; - } - L2 = Def; - Def = *UseToDefs.find(Def)->second.begin(); - L1 = Def; - } // else the element in the middle of the chain is nothing, thus - // Def already contains the first element of the chain. - - // Check the number of users of the first node in the chain, i.e., ADRP - const SetOfMachineInstr *Users = - getUses(DefsPerColorToUses, - RegToId.find(Def->getOperand(0).getReg())->second, *Def); - if (Users->size() > 1) { -#ifndef NDEBUG - // if all the uses of this def are in the defs of the potential candidate, - // this is a complex candidate of level 1 - if (DefsOfPotentialCandidates.empty()) { - // lazy init - DefsOfPotentialCandidates = PotentialCandidates; - for (const MachineInstr *Candidate : PotentialCandidates) { - if (!UseToDefs.find(Candidate)->second.empty()) - DefsOfPotentialCandidates.insert( - *UseToDefs.find(Candidate)->second.begin()); - } - } - bool Found = false; - for (auto &Use : *Users) { - if (!DefsOfPotentialCandidates.count(Use)) { - ++NumTooCplxLvl1; - Found = true; - break; - } - } - if (!Found) - ++NumCplxLvl1; -#endif // NDEBUG - continue; + } else { + assert(MI.getOpcode() == AArch64::LDRXui && "Expect LDRXui"); + assert((MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) && + "Expected GOT relocation"); + if (OpInfo.Type == MCLOH_AdrpAddStr && OpInfo.MI1 == nullptr) { + OpInfo.Type = MCLOH_AdrpLdrGotStr; + OpInfo.IsCandidate = true; + OpInfo.MI1 = &MI; + return true; + } else if (OpInfo.Type == MCLOH_AdrpLdr) { + OpInfo.Type = MCLOH_AdrpLdrGotLdr; + OpInfo.IsCandidate = true; + OpInfo.MI1 = &MI; + return true; } + } + return false; +} - bool IsL2Add = (ImmediateDefOpc == AArch64::ADDXri); - // If the chain is three instructions long and ldr is the second element, - // then this ldr must load form GOT, otherwise this is not a correct chain. - if (L2 && !IsL2Add && - !(L2->getOperand(2).getTargetFlags() & AArch64II::MO_GOT)) - continue; - SmallVector Args; - MCLOHType Kind; - if (isCandidateLoad(Candidate)) { - if (!L2) { - // At this point, the candidate LOH indicates that the ldr instruction - // may use a direct access to the symbol. There is not such encoding - // for loads of byte and half. - if (!supportLoadFromLiteral(Candidate)) - continue; - - DEBUG(dbgs() << "Record AdrpLdr:\n" << *L1 << '\n' << *Candidate - << '\n'); - Kind = MCLOH_AdrpLdr; - Args.push_back(L1); - Args.push_back(Candidate); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) && - "L1 already involved in LOH."); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) && - "Candidate already involved in LOH."); +/// Update state when seeing and ADRP instruction. +static void handleADRP(const MachineInstr &MI, AArch64FunctionInfo &AFI, + LOHInfo &Info) { + if (Info.LastADRP != nullptr) { + DEBUG(dbgs() << "Adding MCLOH_AdrpAdrp:\n" << '\t' << MI << '\t' + << *Info.LastADRP); + AFI.addLOHDirective(MCLOH_AdrpAdrp, {&MI, Info.LastADRP}); + ++NumADRPSimpleCandidate; + } + + // Produce LOH directive if possible. + if (Info.IsCandidate) { + switch (Info.Type) { + case MCLOH_AdrpAdd: + DEBUG(dbgs() << "Adding MCLOH_AdrpAdd:\n" << '\t' << MI << '\t' + << *Info.MI0); + AFI.addLOHDirective(MCLOH_AdrpAdd, {&MI, Info.MI0}); + ++NumADRSimpleCandidate; + break; + case MCLOH_AdrpLdr: + if (supportLoadFromLiteral(*Info.MI0)) { + DEBUG(dbgs() << "Adding MCLOH_AdrpLdr:\n" << '\t' << MI << '\t' + << *Info.MI0); + AFI.addLOHDirective(MCLOH_AdrpLdr, {&MI, Info.MI0}); ++NumADRPToLDR; - } else { - DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot") - << "Ldr:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate - << '\n'); - - Kind = IsL2Add ? MCLOH_AdrpAddLdr : MCLOH_AdrpLdrGotLdr; - Args.push_back(L1); - Args.push_back(L2); - Args.push_back(Candidate); - - PotentialADROpportunities.remove(L2); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) && - "L1 already involved in LOH."); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) && - "L2 already involved in LOH."); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) && - "Candidate already involved in LOH."); -#ifndef NDEBUG - // get the immediate of the load - if (Candidate->getOperand(2).getImm() == 0) - if (ImmediateDefOpc == AArch64::ADDXri) - ++NumADDToLDR; - else - ++NumLDRToLDR; - else if (ImmediateDefOpc == AArch64::ADDXri) - ++NumADDToLDRWithImm; - else - ++NumLDRToLDRWithImm; -#endif // NDEBUG - } - } else { - if (ImmediateDefOpc == AArch64::ADRP) - continue; - else { - - DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot") - << "Str:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate - << '\n'); - - Kind = IsL2Add ? MCLOH_AdrpAddStr : MCLOH_AdrpLdrGotStr; - Args.push_back(L1); - Args.push_back(L2); - Args.push_back(Candidate); - - PotentialADROpportunities.remove(L2); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) && - "L1 already involved in LOH."); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) && - "L2 already involved in LOH."); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) && - "Candidate already involved in LOH."); -#ifndef NDEBUG - // get the immediate of the store - if (Candidate->getOperand(2).getImm() == 0) - if (ImmediateDefOpc == AArch64::ADDXri) - ++NumADDToSTR; - else - ++NumLDRToSTR; - else if (ImmediateDefOpc == AArch64::ADDXri) - ++NumADDToSTRWithImm; - else - ++NumLDRToSTRWithImm; -#endif // DEBUG } + break; + case MCLOH_AdrpAddLdr: + DEBUG(dbgs() << "Adding MCLOH_AdrpAddLdr:\n" << '\t' << MI << '\t' + << *Info.MI1 << '\t' << *Info.MI0); + AFI.addLOHDirective(MCLOH_AdrpAddLdr, {&MI, Info.MI1, Info.MI0}); + ++NumADDToLDR; + break; + case MCLOH_AdrpAddStr: + if (Info.MI1 != nullptr) { + DEBUG(dbgs() << "Adding MCLOH_AdrpAddStr:\n" << '\t' << MI << '\t' + << *Info.MI1 << '\t' << *Info.MI0); + AFI.addLOHDirective(MCLOH_AdrpAddStr, {&MI, Info.MI1, Info.MI0}); + ++NumADDToSTR; + } + break; + case MCLOH_AdrpLdrGotLdr: + DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGotLdr:\n" << '\t' << MI << '\t' + << *Info.MI1 << '\t' << *Info.MI0); + AFI.addLOHDirective(MCLOH_AdrpLdrGotLdr, {&MI, Info.MI1, Info.MI0}); + ++NumLDRToLDR; + break; + case MCLOH_AdrpLdrGotStr: + DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGotStr:\n" << '\t' << MI << '\t' + << *Info.MI1 << '\t' << *Info.MI0); + AFI.addLOHDirective(MCLOH_AdrpLdrGotStr, {&MI, Info.MI1, Info.MI0}); + ++NumLDRToSTR; + break; + case MCLOH_AdrpLdrGot: + DEBUG(dbgs() << "Adding MCLOH_AdrpLdrGot:\n" << '\t' << MI << '\t' + << *Info.MI0); + AFI.addLOHDirective(MCLOH_AdrpLdrGot, {&MI, Info.MI0}); + break; + case MCLOH_AdrpAdrp: + llvm_unreachable("MCLOH_AdrpAdrp not used in state machine"); } - AArch64FI.addLOHDirective(Kind, Args); } - // Now, we grabbed all the big patterns, check ADR opportunities. - for (const MachineInstr *Candidate : PotentialADROpportunities) - registerADRCandidate(*Candidate, UseToDefs, DefsPerColorToUses, AArch64FI, - InvolvedInLOHs, RegToId); + handleClobber(Info); + Info.LastADRP = &MI; +} + +static void handleRegMaskClobber(const uint32_t *RegMask, MCPhysReg Reg, + LOHInfo *LOHInfos) { + if (!MachineOperand::clobbersPhysReg(RegMask, Reg)) + return; + int Idx = mapRegToGPRIndex(Reg); + if (Idx >= 0) + handleClobber(LOHInfos[Idx]); } -/// Look for every register defined by potential LOHs candidates. -/// Map these registers with dense id in @p RegToId and vice-versa in -/// @p IdToReg. @p IdToReg is populated only in DEBUG mode. -static void collectInvolvedReg(const MachineFunction &MF, MapRegToId &RegToId, - MapIdToReg &IdToReg, - const TargetRegisterInfo *TRI) { - unsigned CurRegId = 0; - if (!PreCollectRegister) { - unsigned NbReg = TRI->getNumRegs(); - for (; CurRegId < NbReg; ++CurRegId) { - RegToId[CurRegId] = CurRegId; - DEBUG(IdToReg.push_back(CurRegId)); - DEBUG(assert(IdToReg[CurRegId] == CurRegId && "Reg index mismatches")); +static void handleNormalInst(const MachineInstr &MI, LOHInfo *LOHInfos) { + // Handle defs and regmasks. + for (const MachineOperand &MO : MI.operands()) { + if (MO.isRegMask()) { + const uint32_t *RegMask = MO.getRegMask(); + for (MCPhysReg Reg : AArch64::GPR32RegClass) + handleRegMaskClobber(RegMask, Reg, LOHInfos); + for (MCPhysReg Reg : AArch64::GPR64RegClass) + handleRegMaskClobber(RegMask, Reg, LOHInfos); + continue; } - return; + if (!MO.isReg() || !MO.isDef()) + continue; + int Idx = mapRegToGPRIndex(MO.getReg()); + if (Idx < 0) + continue; + handleClobber(LOHInfos[Idx]); } - - DEBUG(dbgs() << "** Collect Involved Register\n"); - for (const auto &MBB : MF) { - for (const MachineInstr &MI : MBB) { - if (!canDefBePartOfLOH(&MI) && - !isCandidateLoad(&MI) && !isCandidateStore(&MI)) - continue; - - // Process defs - for (MachineInstr::const_mop_iterator IO = MI.operands_begin(), - IOEnd = MI.operands_end(); - IO != IOEnd; ++IO) { - if (!IO->isReg() || !IO->isDef()) - continue; - unsigned CurReg = IO->getReg(); - for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) - if (RegToId.find(*AI) == RegToId.end()) { - DEBUG(IdToReg.push_back(*AI); - assert(IdToReg[CurRegId] == *AI && - "Reg index mismatches insertion index.")); - RegToId[*AI] = CurRegId++; - DEBUG(dbgs() << "Register: " << PrintReg(*AI, TRI) << '\n'); - } - } - } + // Handle uses. + for (const MachineOperand &MO : MI.uses()) { + if (!MO.isReg() || !MO.readsReg()) + continue; + int Idx = mapRegToGPRIndex(MO.getReg()); + if (Idx < 0) + continue; + handleUse(MI, MO, LOHInfos[Idx]); } } @@ -1035,74 +488,59 @@ if (skipFunction(*MF.getFunction())) return false; - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - const MachineDominatorTree *MDT = &getAnalysis(); - - MapRegToId RegToId; - MapIdToReg IdToReg; - AArch64FunctionInfo *AArch64FI = MF.getInfo(); - assert(AArch64FI && "No MachineFunctionInfo for this function!"); - - DEBUG(dbgs() << "Looking for LOH in " << MF.getName() << '\n'); + DEBUG(dbgs() << "********** AArch64 Collect LOH **********\n" + << "Looking in function " << MF.getName() << '\n'); - collectInvolvedReg(MF, RegToId, IdToReg, TRI); - if (RegToId.empty()) - return false; + LOHInfo LOHInfos[N_GPR_REGS]; + AArch64FunctionInfo &AFI = *MF.getInfo(); + for (const MachineBasicBlock &MBB : MF) { + // Reset register tracking state. + memset(LOHInfos, 0, sizeof(LOHInfos)); + // Live-out registers are used. + for (const MachineBasicBlock *Succ : MBB.successors()) { + for (const auto &LI : Succ->liveins()) { + int RegIdx = mapRegToGPRIndex(LI.PhysReg); + if (RegIdx >= 0) + LOHInfos[RegIdx].OneUser = true; + } + } - MachineInstr *DummyOp = nullptr; - if (BasicBlockScopeOnly) { - const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); - // For local analysis, create a dummy operation to record uses that are not - // local. - DummyOp = MF.CreateMachineInstr(TII->get(AArch64::COPY), DebugLoc()); + // Walk the basic block backwards and update the per register state machine + // in the process. + for (const MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) { + unsigned Opcode = MI.getOpcode(); + switch (Opcode) { + case AArch64::ADDXri: + case AArch64::LDRXui: + if (canDefBePartOfLOH(MI)) { + const MachineOperand &Def = MI.getOperand(0); + const MachineOperand &Op = MI.getOperand(1); + assert(Def.isReg() && Def.isDef() && "Expected reg def"); + assert(Op.isReg() && Op.isUse() && "Expected reg use"); + int DefIdx = mapRegToGPRIndex(Def.getReg()); + int OpIdx = mapRegToGPRIndex(Op.getReg()); + if (DefIdx >= 0 && OpIdx >= 0 && + handleMiddleInst(MI, LOHInfos[DefIdx], LOHInfos[OpIdx])) + continue; + } + break; + case AArch64::ADRP: + const MachineOperand &Op0 = MI.getOperand(0); + int Idx = mapRegToGPRIndex(Op0.getReg()); + if (Idx >= 0) { + handleADRP(MI, AFI, LOHInfos[Idx]); + continue; + } + break; + } + handleNormalInst(MI, LOHInfos); + } } - unsigned NbReg = RegToId.size(); - bool Modified = false; - - // Start with ADRP. - InstrToInstrs *ColorOpToReachedUses = new InstrToInstrs[NbReg]; - - // Compute the reaching def in ADRP mode, meaning ADRP definitions - // are first considered as uses. - reachingDef(MF, ColorOpToReachedUses, RegToId, true, DummyOp); - DEBUG(dbgs() << "ADRP reaching defs\n"); - DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg)); - - // Translate the definition to uses map into a use to definitions map to ease - // statistic computation. - InstrToInstrs ADRPToReachingDefs; - reachedUsesToDefs(ADRPToReachingDefs, ColorOpToReachedUses, RegToId, true); - - // Compute LOH for ADRP. - computeADRP(ADRPToReachingDefs, *AArch64FI, MDT); - delete[] ColorOpToReachedUses; - - // Continue with general ADRP -> ADD/LDR -> LDR/STR pattern. - ColorOpToReachedUses = new InstrToInstrs[NbReg]; - - // first perform a regular reaching def analysis. - reachingDef(MF, ColorOpToReachedUses, RegToId, false, DummyOp); - DEBUG(dbgs() << "All reaching defs\n"); - DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg)); - - // Turn that into a use to defs to ease statistic computation. - InstrToInstrs UsesToReachingDefs; - reachedUsesToDefs(UsesToReachingDefs, ColorOpToReachedUses, RegToId, false); - - // Compute other than AdrpAdrp LOH. - computeOthers(UsesToReachingDefs, ColorOpToReachedUses, *AArch64FI, RegToId, - MDT); - delete[] ColorOpToReachedUses; - - if (BasicBlockScopeOnly) - MF.DeleteMachineInstr(DummyOp); - - return Modified; + // Return "no change": The pass only collects information. + return false; } -/// createAArch64CollectLOHPass - returns an instance of the Statistic for -/// linker optimization pass. FunctionPass *llvm::createAArch64CollectLOHPass() { return new AArch64CollectLOH(); } Index: llvm/trunk/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll +++ llvm/trunk/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm64-apple-ios -O3 -aarch64-enable-collect-loh -aarch64-collect-loh-bb-only=true -aarch64-collect-loh-pre-collect-register=false < %s -o - | FileCheck %s +; RUN: llc -o - %s -mtriple=arm64-apple-ios -O3 -aarch64-enable-collect-loh | FileCheck %s ; Check that the LOH analysis does not crash when the analysed chained ; contains instructions that are filtered out. ; Index: llvm/trunk/test/CodeGen/AArch64/arm64-collect-loh-str.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/arm64-collect-loh-str.ll +++ llvm/trunk/test/CodeGen/AArch64/arm64-collect-loh-str.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm64-apple-ios -O2 -aarch64-enable-collect-loh -aarch64-collect-loh-bb-only=false < %s -o - | FileCheck %s +; RUN: llc -o - %s -mtriple=arm64-apple-ios -O2 | FileCheck %s ; Test case for . ; AdrpAddStr cannot be used when the store uses same ; register as address and value. Indeed, the related Index: llvm/trunk/test/CodeGen/AArch64/arm64-collect-loh.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/arm64-collect-loh.ll +++ llvm/trunk/test/CodeGen/AArch64/arm64-collect-loh.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=arm64-apple-ios -O2 -aarch64-enable-collect-loh -aarch64-collect-loh-bb-only=false < %s -o - | FileCheck %s -; RUN: llc -mtriple=arm64-linux-gnu -O2 -aarch64-enable-collect-loh -aarch64-collect-loh-bb-only=false < %s -o - | FileCheck %s --check-prefix=CHECK-ELF +; RUN: llc -o - %s -mtriple=arm64-apple-ios -O2 | FileCheck %s +; RUN: llc -o - %s -mtriple=arm64-linux-gnu -O2 | FileCheck %s --check-prefix=CHECK-ELF ; CHECK-ELF-NOT: .loh ; CHECK-ELF-NOT: AdrpAdrp @@ -633,11 +633,14 @@ ; a tuple register to appear in the lowering. Thus, the target ; cpu is required to have the problem reproduced. ; CHECK-LABEL: _uninterestingSub +; CHECK: [[LOH_LABEL0:Lloh[0-9]+]]: ; CHECK: adrp [[ADRP_REG:x[0-9]+]], [[CONSTPOOL:lCPI[0-9]+_[0-9]+]]@PAGE -; CHECK-NEXT: ldr q[[IDX:[0-9]+]], {{\[}}[[ADRP_REG]], [[CONSTPOOL]]@PAGEOFF] +; CHECK: [[LOH_LABEL1:Lloh[0-9]+]]: +; CHECK: ldr q[[IDX:[0-9]+]], {{\[}}[[ADRP_REG]], [[CONSTPOOL]]@PAGEOFF] ; The tuple comes from the next instruction. ; CHECK-NEXT: tbl.16b v{{[0-9]+}}, { v{{[0-9]+}}, v{{[0-9]+}} }, v[[IDX]] ; CHECK: ret +; CHECK: .loh AdrpLdr [[LOH_LABEL0]], [[LOH_LABEL1]] define void @uninterestingSub(i8* nocapture %row) #0 { %tmp = bitcast i8* %row to <16 x i8>* %tmp1 = load <16 x i8>, <16 x i8>* %tmp, align 16 @@ -664,10 +667,10 @@ if.then.i: ret void if.end.i: -; CHECK: .loh AdrpAdrp Lloh91, Lloh93 -; CHECK: .loh AdrpLdr Lloh91, Lloh92 -; CHECK: .loh AdrpLdrGot Lloh93, Lloh95 -; CHECK: .loh AdrpLdrGot Lloh94, Lloh96 +; CHECK: .loh AdrpLdrGot +; CHECK: .loh AdrpLdrGot +; CHECK: .loh AdrpAdrp +; CHECK: .loh AdrpLdr %mul.i.i.i = fmul double undef, 1.000000e-06 %add.i.i.i = fadd double undef, %mul.i.i.i %sub.i.i = fsub double %add.i.i.i, undef Index: llvm/trunk/test/CodeGen/AArch64/loh.mir =================================================================== --- llvm/trunk/test/CodeGen/AArch64/loh.mir +++ llvm/trunk/test/CodeGen/AArch64/loh.mir @@ -0,0 +1,179 @@ +# RUN: llc -o /dev/null %s -mtriple=aarch64-apple-ios -run-pass=aarch64-collect-loh -debug-only=aarch64-collect-loh 2>&1 | FileCheck %s +--- | + define void @func0() { ret void } + + declare void @extfunc() + + @g0 = external global i32 + @g1 = external global i32 + @g2 = external global i32 + @g3 = external global i32 + @g4 = external global i32 + @g5 = external global i32 +... +--- +# Check various LOH variants. Remember that the algorithms walks the basic +# blocks backwards. +# CHECK-LABEL: ********** AArch64 Collect LOH ********** +# CHECK-LABEL: Looking in function func0 +name: func0 +body: | + bb.0: + ; CHECK: Adding MCLOH_AdrpAdrp: + ; CHECK-NEXT: %X1 = ADRP + ; CHECK-NEXT: %X1 = ADRP + ; CHECK-NEXT: Adding MCLOH_AdrpAdrp: + ; CHECK-NEXT: %X1 = ADRP + ; CHECK-NEXT: %X1 = ADRP + ; CHECK-NEXT: Adding MCLOH_AdrpAdrp: + ; CHECK-NEXT: %X0 = ADRP + ; CHECK-NEXT: %X0 = ADRP + %x0 = ADRP target-flags(aarch64-page) @g0 + %x0 = ADRP target-flags(aarch64-page) @g1 + %x1 = ADRP target-flags(aarch64-page) @g2 + %x1 = ADRP target-flags(aarch64-page) @g3 + %x1 = ADRP target-flags(aarch64-page) @g4 + + bb.1: + ; CHECK-NEXT: Adding MCLOH_AdrpAdd: + ; CHECK-NEXT: %X20 = ADRP + ; CHECK-NEXT: %X3 = ADDXri %X20, + ; CHECK-NEXT: Adding MCLOH_AdrpAdd: + ; CHECK-NEXT: %X1 = ADRP + ; CHECK-NEXT: %X1 = ADDXri %X1, + %x1 = ADRP target-flags(aarch64-page) @g0 + %x9 = SUBXri %x11, 5, 0 ; should not affect MCLOH formation + %x1 = ADDXri %x1, target-flags(aarch64-pageoff) @g0, 0 + %x20 = ADRP target-flags(aarch64-page) @g0 + BL @extfunc, csr_aarch64_aapcs ; should not clobber X20 + %x3 = ADDXri %x20, target-flags(aarch64-pageoff) @g0, 0 + + bb.2: + ; CHECK-NOT: MCLOH_AdrpAdd + %x9 = ADRP target-flags(aarch64-page) @g0 + BL @extfunc, csr_aarch64_aapcs ; clobbers x9 + %x9 = ADDXri %x9, target-flags(aarch64-pageoff) @g0, 0 + + bb.3: + ; CHECK-NOT: MCLOH_AdrpAdd + %x10 = ADRP target-flags(aarch64-page) @g0 + HINT 0, implicit def dead %x10 ; clobbers x10 + %x10 = ADDXri %x10, target-flags(aarch64-pageoff) @g0, 0 + + bb.4: + ; Cannot produce a LOH for multiple users + ; CHECK-NOT: MCLOH_AdrpAdd + %x10 = ADRP target-flags(aarch64-page) @g0 + HINT 0, implicit def dead %x10 ; clobbers x10 + %x11 = ADDXri %x10, target-flags(aarch64-pageoff) @g0, 0 + %x12 = ADDXri %x10, target-flags(aarch64-pageoff) @g0, 0 + + bb.5: + ; CHECK-NEXT: Adding MCLOH_AdrpLdr: + ; CHECK-NEXT: %X5 = ADRP + ; CHECK-NEXT: %S6 = LDRSui %X5, + ; CHECK-NEXT: Adding MCLOH_AdrpLdr: + ; CHECK-NEXT: %X4 = ADRP + ; CHECK-NEXT: %X4 = LDRXui %X4, + %x4 = ADRP target-flags(aarch64-page) @g2 + %x4 = LDRXui %x4, target-flags(aarch64-pageoff) @g2 + %x5 = ADRP target-flags(aarch64-page) @g2 + %s6 = LDRSui %x5, target-flags(aarch64-pageoff) @g2 + + bb.6: + ; CHECK-NEXT: Adding MCLOH_AdrpLdrGot: + ; CHECK-NEXT: %X5 = ADRP + ; CHECK-NEXT: %X6 = LDRXui %X5, + ; CHECK-NEXT: Adding MCLOH_AdrpLdrGot: + ; CHECK-NEXT: %X4 = ADRP + ; CHECK-NEXT: %X4 = LDRXui %X4, + %x4 = ADRP target-flags(aarch64-page, aarch64-got) @g2 + %x4 = LDRXui %x4, target-flags(aarch64-pageoff, aarch64-got) @g2 + %x5 = ADRP target-flags(aarch64-page, aarch64-got) @g2 + %x6 = LDRXui %x5, target-flags(aarch64-pageoff, aarch64-got) @g2 + + bb.7: + ; CHECK-NOT: Adding MCLOH_AdrpLdrGot: + ; This sequence makes no sense and should not produce a LdrGot + %x11 = ADRP target-flags(aarch64-page, aarch64-got) @g5 + %s11 = LDRSui %x4, target-flags(aarch64-pageoff, aarch64-got) @g5 + + bb.8: + ; CHECK-NEXT: Adding MCLOH_AdrpAddLdr: + ; CHECK-NEXT: %X7 = ADRP [TF=1] + ; CHECK-NEXT: %X8 = ADDXri %X7, + ; CHECK-NEXT: %D1 = LDRDui %X8, 8 + %x7 = ADRP target-flags(aarch64-page) @g3 + %x8 = ADDXri %x7, target-flags(aarch64-pageoff) @g3, 0 + %d1 = LDRDui %x8, 8 + + bb.9: + ; CHECK-NEXT: Adding MCLOH_AdrpAdd: + ; CHECK-NEXT: %X3 = ADRP + ; CHECK-NEXT: %X3 = ADDXri %X3, + ; CHECK-NEXT: Adding MCLOH_AdrpAdd: + ; CHECK-NEXT: %X5 = ADRP + ; CHECK-NEXT: %X2 = ADDXri %X5, + ; CHECK-NEXT: Adding MCLOH_AdrpAddStr: + ; CHECK-NEXT: %X1 = ADRP + ; CHECK-NEXT: %X1 = ADDXri %X1, + ; CHECK-NEXT: STRXui %X2, %X1, 16 + %x1 = ADRP target-flags(aarch64-page) @g3 + %x1 = ADDXri %x1, target-flags(aarch64-pageoff) @g3, 0 + STRXui %x2, %x1, 16 + + ; This sequence should just produce an AdrpAdd (not AdrpAddStr) + %x5 = ADRP target-flags(aarch64-page) @g3 + %x2 = ADDXri %x5, target-flags(aarch64-pageoff) @g3, 0 + STRXui %x2, %x11, 16 + + ; This sequence should just produce an AdrpAdd (not AdrpAddStr) + %x3 = ADRP target-flags(aarch64-page) @g3 + %x3 = ADDXri %x3, target-flags(aarch64-pageoff) @g3, 0 + STRXui %x3, %x3, 16 + + bb.10: + ; CHECK-NEXT: Adding MCLOH_AdrpLdr: + ; CHECK-NEXT: %X2 = ADRP + ; CHECK-NEXT: %X2 = LDRXui %X2, + ; CHECK-NEXT: Adding MCLOH_AdrpLdrGotLdr: + ; CHECK-NEXT: %X1 = ADRP + ; CHECK-NEXT: %X1 = LDRXui %X1, + ; CHECK-NEXT: %X1 = LDRXui %X1, 24 + %x1 = ADRP target-flags(aarch64-page, aarch64-got) @g4 + %x1 = LDRXui %x1, target-flags(aarch64-pageoff, aarch64-got) @g4 + %x1 = LDRXui %x1, 24 + ; Should just produce a MCLOH_AdrpLdr (not MCLOH_AdrpLdrGotLdr) + %x2 = ADRP target-flags(aarch64-page) @g3 + %x2 = LDRXui %x2, target-flags(aarch64-pageoff) @g3 + %x2 = LDRXui %x2, 24 + + bb.11: + ; CHECK-NEXT: Adding MCLOH_AdrpLdr + ; CHECK-NEXT: %X5 = ADRP + ; CHECK-NEXT: %X5 = LDRXui %X5, + ; CHECK-NEXT: Adding MCLOH_AdrpLdrGotStr: + ; CHECK-NEXT: %X1 = ADRP + ; CHECK-NEXT: %X1 = LDRXui %X1, + ; CHECK-NEXT: STRXui %X4, %X1, 32 + %x1 = ADRP target-flags(aarch64-page, aarch64-got) @g4 + %x1 = LDRXui %x1, target-flags(aarch64-pageoff, aarch64-got) @g4 + STRXui %x4, %x1, 32 + ; Should just produce a MCLOH_AdrpLdr (not MCLOH_AdrpLdrGotStr) + %x5 = ADRP target-flags(aarch64-page) @g1 + %x5 = LDRXui %x5, target-flags(aarch64-pageoff) @g1 + STRXui %x11, %x5, 32 + + bb.12: + successors: %bb.13 + ; Cannot produce a LOH for multiple users + ; CHECK-NOT: MCLOH_AdrpAdd + %x10 = ADRP target-flags(aarch64-page) @g0 + HINT 0, implicit def dead %x10 ; clobbers x10 + %x11 = ADDXri %x10, target-flags(aarch64-pageoff) @g0, 0 + B %bb.13 + + bb.13: + liveins: %x10 + %x12 = ADDXri %x10, target-flags(aarch64-pageoff) @g0, 0 +...