Index: include/llvm/CodeGen/ScheduleDAG.h =================================================================== --- include/llvm/CodeGen/ScheduleDAG.h +++ include/llvm/CodeGen/ScheduleDAG.h @@ -413,6 +413,17 @@ /// specified node. bool addPred(const SDep &D, bool Required = true); + /// addPredBarrier - This adds a barrier edge to SU by calling + /// addPred(), with latency 0 generally or latency 1 for a store + /// followed by a load. + bool addPredBarrier(SUnit *SU, unsigned latency = 0) { + SDep Dep(SU, SDep::Barrier); + unsigned TrueMemOrderLatency = + ((SU->getInstr()->mayStore() && this->getInstr()->mayLoad()) ? 1 : 0); + Dep.setLatency(TrueMemOrderLatency); + return addPred(Dep); + } + /// removePred - This removes the specified edge as a pred of the current /// node if it exists. It also removes the current node as a successor of /// the specified node. Index: include/llvm/CodeGen/ScheduleDAGInstrs.h =================================================================== --- include/llvm/CodeGen/ScheduleDAGInstrs.h +++ include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -15,12 +15,14 @@ #ifndef LLVM_CODEGEN_SCHEDULEDAGINSTRS_H #define LLVM_CODEGEN_SCHEDULEDAGINSTRS_H +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SparseMultiSet.h" #include "llvm/ADT/SparseSet.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/Support/Compiler.h" #include "llvm/Target/TargetRegisterInfo.h" +#include namespace llvm { class MachineFrameInfo; @@ -71,6 +73,10 @@ /// vreg use list. typedef SparseMultiSet VReg2UseMap; + typedef PointerUnion ValueType; + typedef SmallVector, 4> + UnderlyingObjectsVector; + /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of /// MachineInstrs. class ScheduleDAGInstrs : public ScheduleDAG { @@ -135,10 +141,152 @@ /// Track the last instruction in this region defining each virtual register. VReg2SUnitMap VRegDefs; - /// PendingLoads - Remember where unknown loads are after the most recent - /// unknown store, as we iterate. As with Defs and Uses, this is here - /// to minimize construction/destruction. - std::vector PendingLoads; + AliasAnalysis *AAForDep; + + /// A list of SUnits, used in Value2SUsMap, during DAG construction. + class SUList : public std::list { + /// This SUnit reaches all nodes that has gotten pruned from this + /// list, by call to reduce(). + SUnit *BarrierSU; + + /// Chain dependencies used to enforce memory order should have + /// latency of 0 (except for true dependency of Store followed by + /// aliased Load... we estimate that with a single cycle of latency + /// assuming the hardware will bypass) + unsigned TrueMemOrderLatency; + + public: + + SUList(unsigned lat) : BarrierSU(nullptr), TrueMemOrderLatency(lat) {} + SUList() : BarrierSU(nullptr), TrueMemOrderLatency(0) {} + + bool hasBarrierSU() const { return BarrierSU != nullptr; } + SUnit *getBarrierSU() const { return BarrierSU; } + + unsigned getTrueMemOrderLatency() const { return TrueMemOrderLatency; } + + /// Reduce the size of this list by popping from front (FIFO) + /// until newSize is reached. The popped nodes are made + /// successors of the BarrierSU, which was the last node popped. + void reduce(unsigned newSize); + + /// Clear all previously added SUs from this list. + void clear() { + BarrierSU = nullptr; + std::list::clear(); + } + + void dump() const; + }; + + /// A map from ValueType to SUList, used during DAG construction, + /// as a means of remembering which SUs depend on which memory + /// locations. + class Value2SUsMap : public MapVector { + + /// Current total number of SUs in map. + unsigned NumNodes; + + /// If NumNodes reaches MaxNumNodes, the SU lists will be reduced + /// as needed. + void reduce(unsigned newSize); + + /// 1 for loads, 0 for stores. (see comment in SUList) + unsigned TrueMemOrderLatency; + public: + + Value2SUsMap(unsigned lat = 0) : NumNodes(0), TrueMemOrderLatency(lat) {} + + /// To keep NumNodes up to date, addToMap() is used instead of + /// this operator w/ push_back(). + ValueType &operator[](const SUList &Key) { assert(0 && "Don't use"); }; + + /// Add SU to the SUList of V. If Map grows huge, reduce its size + /// by calling reduce(). + void addToMap(SUnit *SU, ValueType V); + + /// Clears the list of SUs mapped to V. + void clearList(ValueType V) { + iterator Itr = find(V); + if (Itr != end()) { + assert (NumNodes >= Itr->second.size()); + NumNodes -= Itr->second.size(); + + Itr->second.clear(); + } + } + + /// Clears map from all contents. + void clear() { + MapVector::clear(); + NumNodes = 0; + } + + unsigned getTrueMemOrderLatency() const { return TrueMemOrderLatency; } + }; + + /// Add a chain edge between SUa and SUb, but only if both AliasAnalysis + /// and Target fail to deny the dependency. + void addChainDependency(SUnit *SUa, SUnit *SUb, + unsigned Latency = 0); + + /// Add dependencies as needed from all SUs in list to SU. + void addChainDependencies(SUnit *SU, SUList &sus, unsigned Latency) { + for (auto *su : sus) + addChainDependency(SU, su, Latency); + + // If this sulist has been reduced, its BarrierSU must also be + // considered. + if (sus.hasBarrierSU() && sus.getBarrierSU() != SU) + sus.getBarrierSU()->addPredBarrier(SU); + } + + /// Add dependencies as needed from all SUs in list to SU, with + /// the TrueMemOrderLatency value of sulist. + void addChainDependencies(SUnit *SU, SUList &sus) { + addChainDependencies(SU, sus, sus.getTrueMemOrderLatency()); + } + + /// Add dependencies as needed from all SUs in map, to SU. + void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap) { + for (auto &I : Val2SUsMap) + addChainDependencies(SU, I.second, + Val2SUsMap.getTrueMemOrderLatency()); + } + + /// Add dependencies as needed to SU, from all SUs mapped to V. + void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap, + ValueType V) { + Value2SUsMap::iterator Itr = Val2SUsMap.find(V); + if (Itr != Val2SUsMap.end()) + addChainDependencies(SU, Itr->second, + Val2SUsMap.getTrueMemOrderLatency()); + } + + /// Add barrier chains from all SUs in list to BarrierSU and then + /// clear the list. Called from addBarrierChainsAndClearMap(). + void addBarrierChains(SUnit *BarrierSU, SUList &sus) { + for (auto *su : sus) + su->addPredBarrier(BarrierSU); + + if (sus.hasBarrierSU()) + sus.getBarrierSU()->addPredBarrier(BarrierSU); + + sus.clear(); + } + + /// Add barrier chains from all SUs in map to BarrierSU and then + /// clear the map. + void addBarrierChainsAndClearMap(SUnit *BarrierSU, + Value2SUsMap &Val2SUsMap) { + for (auto &I : Val2SUsMap) + addBarrierChains(BarrierSU, I.second); + + Val2SUsMap.clear(); + } + + /// For an unanalyzable memory access, this Value is used in maps. + UndefValue *UnknownValue; /// DbgValues - Remember instruction that precedes DBG_VALUE. /// These are generated by buildSchedGraph but persist so they can be Index: lib/CodeGen/ScheduleDAGInstrs.cpp =================================================================== --- lib/CodeGen/ScheduleDAGInstrs.cpp +++ lib/CodeGen/ScheduleDAGInstrs.cpp @@ -13,7 +13,6 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/ScheduleDAGInstrs.h" -#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -26,6 +25,8 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/ScheduleDFS.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" #include "llvm/IR/Operator.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" @@ -49,13 +50,99 @@ static cl::opt UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction")); +static cl::opt MaxNumNodes("max-build-sched-nodes", cl::Hidden, + cl::init(500), cl::desc("Limit the number of SUs in each map in " + "buildSchedGraph()")); + +void ScheduleDAGInstrs::SUList::dump() const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + if (BarrierSU != nullptr) + dbgs() << "BarrierSU: SU(" << BarrierSU->NodeNum << ") + "; + dbgs() << "{ "; + for (auto *su : *this) { + dbgs() << "SU(" << su->NodeNum << ")"; + if (su != back()) + dbgs() << ", "; + } + dbgs() << "}\n"; +#endif +} + +void ScheduleDAGInstrs::SUList::reduce(unsigned newSize) { + assert(size() > newSize); + unsigned toRemove = size() - newSize; + + DEBUG(dbgs() << "SUList reduced from: "; dump();); + + // Remove the elements from beginning of list (fifo), and make + // the last element removed the new BarrierSU. + iterator I = begin(), NewBarrierItr = begin(); + advance(NewBarrierItr, toRemove - 1); + while (I != NewBarrierItr) { + (*(I++))->addPredBarrier(*NewBarrierItr); + pop_front(); + } + + // Handle any old BarrierSU and set new BarrierSU. + if (hasBarrierSU()) + BarrierSU->addPredBarrier(*NewBarrierItr); + BarrierSU = *NewBarrierItr; + pop_front(); // pop NewBarrierItr + + DEBUG(dbgs() << "to: "; dump();); +} + +void ScheduleDAGInstrs::Value2SUsMap::addToMap(SUnit *SU, ValueType V) { + MapVector::operator[](V).push_back(SU); + if (++NumNodes > MaxNumNodes) + reduce(MaxNumNodes * 3 / 4); +} + +void ScheduleDAGInstrs::Value2SUsMap::reduce(unsigned newSize) { + assert(NumNodes > newSize); + + DEBUG(dbgs() << "Reducing map, old size: " << NumNodes << "\n"); + + // Sort the lists by size. + std::vector lists; + for (auto &I : *this) + lists.push_back(&I.second); + std::sort(lists.begin(), lists.end(), + [](const SUList *A, const SUList *B) { + return A->size() > B->size(); }); + + // Reduce the lists in order of their sizes. + for (unsigned i = 0; i < lists.size();) { + if (lists[i]->size() == 0) + break; + + // Reduce the current list, which is the biggest one in map. + unsigned newListSize = lists[i]->size() * 3 / 4; + NumNodes -= lists[i]->size() - newListSize; + lists[i]->reduce(newListSize); + + // Continue with next list unless the current one is still bigger. + if (i + 1 == lists.size() || lists[i]->size() <= lists[i + 1]->size()) + ++i; + + // Stop if map has shrunk enough. + if (NumNodes <= newSize) + break; + } + + DEBUG(dbgs() << "New size: " << NumNodes << "\n";); +} + ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo *mli, bool IsPostRAFlag, bool RemoveKillFlags, LiveIntervals *lis) : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), LIS(lis), IsPostRA(IsPostRAFlag), RemoveKillFlags(RemoveKillFlags), - CanHandleTerminators(false), FirstDbgValue(nullptr) { + CanHandleTerminators(false), AAForDep(nullptr), + UnknownValue(UndefValue::get( + Type::getVoidTy(mf.getFunction()->getContext()))), + FirstDbgValue(nullptr) { assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); DbgValues.clear(); assert(!(IsPostRA && MRI.getNumVirtRegs()) && @@ -123,10 +210,6 @@ } while (!Working.empty()); } -typedef PointerUnion ValueType; -typedef SmallVector, 4> -UnderlyingObjectsVector; - /// getUnderlyingObjectsForInstr - If this machine instr has memory reference /// information and it can be tracked to a normal reference to a known /// object, return the Value for that object. @@ -516,7 +599,12 @@ // Cover a trivial case - no edge is need to itself. if (MIa == MIb) return false; - + + // buildSchedGraph() will clear list of stores if not using AA, + // which means all stores have to be chained then. + if (!AA && MIa->mayStore() && MIb->mayStore()) + return true; + // Let the target decide if memory accesses cannot possibly overlap. if ((MIa->mayLoad() || MIa->mayStore()) && (MIb->mayLoad() || MIb->mayStore())) @@ -577,105 +665,17 @@ return (AAResult != AliasAnalysis::NoAlias); } -/// This recursive function iterates over chain deps of SUb looking for -/// "latest" node that needs a chain edge to SUa. -static unsigned -iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI, - SUnit *SUa, SUnit *SUb, SUnit *ExitSU, unsigned *Depth, - SmallPtrSetImpl &Visited) { - if (!SUa || !SUb || SUb == ExitSU) - return *Depth; - - // Remember visited nodes. - if (!Visited.insert(SUb).second) - return *Depth; - // If there is _some_ dependency already in place, do not - // descend any further. - // TODO: Need to make sure that if that dependency got eliminated or ignored - // for any reason in the future, we would not violate DAG topology. - // Currently it does not happen, but makes an implicit assumption about - // future implementation. - // - // Independently, if we encounter node that is some sort of global - // object (like a call) we already have full set of dependencies to it - // and we can stop descending. - if (SUa->isSucc(SUb) || - isGlobalMemoryObject(AA, SUb->getInstr())) - return *Depth; - - // If we do need an edge, or we have exceeded depth budget, - // add that edge to the predecessors chain of SUb, - // and stop descending. - if (*Depth > 200 || - MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) { - SUb->addPred(SDep(SUa, SDep::MayAliasMem)); - return *Depth; - } - // Track current depth. - (*Depth)++; - // Iterate over memory dependencies only. - for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end(); - I != E; ++I) - if (I->isNormalMemoryOrBarrier()) - iterateChainSucc (AA, MFI, SUa, I->getSUnit(), ExitSU, Depth, Visited); - return *Depth; -} - -/// This function assumes that "downward" from SU there exist -/// tail/leaf of already constructed DAG. It iterates downward and -/// checks whether SU can be aliasing any node dominated -/// by it. -static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI, - SUnit *SU, SUnit *ExitSU, std::set &CheckList, - unsigned LatencyToLoad) { - if (!SU) - return; - - SmallPtrSet Visited; - unsigned Depth = 0; - - for (std::set::iterator I = CheckList.begin(), IE = CheckList.end(); - I != IE; ++I) { - if (SU == *I) - continue; - if (MIsNeedChainEdge(AA, MFI, SU->getInstr(), (*I)->getInstr())) { - SDep Dep(SU, SDep::MayAliasMem); - Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0); - (*I)->addPred(Dep); - } - - // Iterate recursively over all previously added memory chain - // successors. Keep track of visited nodes. - for (SUnit::const_succ_iterator J = (*I)->Succs.begin(), - JE = (*I)->Succs.end(); J != JE; ++J) - if (J->isNormalMemoryOrBarrier()) - iterateChainSucc (AA, MFI, SU, J->getSUnit(), - ExitSU, &Depth, Visited); - } -} - /// Check whether two objects need a chain edge, if so, add it /// otherwise remember the rejected SU. -static inline -void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI, - SUnit *SUa, SUnit *SUb, - std::set &RejectList, - unsigned TrueMemOrderLatency = 0, - bool isNormalMemory = false) { +void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb, + unsigned Latency) { // If this is a false dependency, // do not add the edge, but rememeber the rejected node. - if (MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) { - SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier); - Dep.setLatency(TrueMemOrderLatency); + if (MIsNeedChainEdge(AAForDep, MFI, SUa->getInstr(), SUb->getInstr())) { + SDep Dep(SUa, SDep::MayAliasMem); + Dep.setLatency(Latency); SUb->addPred(Dep); } - else { - // Duplicate entries should be ignored. - RejectList.insert(SUb); - DEBUG(dbgs() << "\tReject chain dep between SU(" - << SUa->NodeNum << ") and SU(" - << SUb->NodeNum << ")\n"); - } } /// Create an SUnit for each real instruction, numbered in top-down toplological @@ -745,7 +745,7 @@ const TargetSubtargetInfo &ST = MF.getSubtarget(); bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI : ST.useAA(); - AliasAnalysis *AAForDep = UseAA ? AA : nullptr; + AAForDep = UseAA ? AA : nullptr; MISUnitMap.clear(); ScheduleDAG::clearDAG(); @@ -759,16 +759,26 @@ // We build scheduling units by walking a block's instruction list from bottom // to top. - // Remember where a generic side-effecting instruction is as we procede. - SUnit *BarrierChain = nullptr, *AliasChain = nullptr; - - // Memory references to specific known memory locations are tracked - // so that they can be given more precise dependencies. We track - // separately the known memory locations that may alias and those - // that are known not to alias - MapVector > AliasMemDefs, NonAliasMemDefs; - MapVector > AliasMemUses, NonAliasMemUses; - std::set RejectMemNodes; + // Remember where a generic side-effecting instruction is as we + // procede. No SU gets ever scheduled around this SU. + SUnit *BarrierChain = nullptr; + + // Each MIs' memory operand(s) is analyzed to a list of underlying + // objects. The SU is then inserted in the SUList(s) mapped from + // that Value(s). Each Value thus gets mapped to a list of SUs + // depending on it, defs and uses kept separately. Two SUs are + // non-aliasing to each other if they depend on different Values + // exclusively. + Value2SUsMap Stores, Loads(1 /*TrueMemOrderLatency*/); + + // Certain memory accesses are known to not alias any SU in Stores + // or Loads, and have therefore their own 'NonAlias' + // domain. E.g. spill / reload instructions never alias LLVM I/R + // Values. It is assumed that this type of memory accesses always + // have a proper memory operand modelling, and are therefore never + // unanalyzable. This means they are non aliasing against all nodes + // in Stores and Loads, including the unanalyzable ones. + Value2SUsMap NonAliasStores, NonAliasLoads(1 /*TrueMemOrderLatency*/); // Remove any stale debug info; sometimes BuildSchedGraph is called again // without emitting the info from the previous call. @@ -850,203 +860,113 @@ ExitSU.addPred(Dep); } - // Add chain dependencies. - // Chain dependencies used to enforce memory order should have - // latency of 0 (except for true dependency of Store followed by - // aliased Load... we estimate that with a single cycle of latency - // assuming the hardware will bypass) - // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable + // Add memory dependencies. + // Note: isStoreToStackSlot and isLoadFromStackSLot are not usable // after stack slots are lowered to actual addresses. - // TODO: Use an AliasAnalysis and do real alias-analysis queries, and - // produce more precise dependence information. - unsigned TrueMemOrderLatency = MI->mayStore() ? 1 : 0; if (isGlobalMemoryObject(AA, MI)) { - // Be conservative with these and add dependencies on all memory - // references, even those that are known to not alias. - for (MapVector >::iterator I = - NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) { - for (unsigned i = 0, e = I->second.size(); i != e; ++i) { - I->second[i]->addPred(SDep(SU, SDep::Barrier)); - } - } - for (MapVector >::iterator I = - NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) { - for (unsigned i = 0, e = I->second.size(); i != e; ++i) { - SDep Dep(SU, SDep::Barrier); - Dep.setLatency(TrueMemOrderLatency); - I->second[i]->addPred(Dep); - } - } - // Add SU to the barrier chain. + // This is a barrier event that acts as a pivotal node in the + // DAG. Add dependencies against everything below it, and clear + // lists. + addBarrierChainsAndClearMap(SU, Stores); + addBarrierChainsAndClearMap(SU, Loads); + addBarrierChainsAndClearMap(SU, NonAliasStores); + addBarrierChainsAndClearMap(SU, NonAliasLoads); + + // Become the new BarrierChain: if (BarrierChain) - BarrierChain->addPred(SDep(SU, SDep::Barrier)); + BarrierChain->addPredBarrier(SU); BarrierChain = SU; - // This is a barrier event that acts as a pivotal node in the DAG, - // so it is safe to clear list of exposed nodes. - adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, - TrueMemOrderLatency); - RejectMemNodes.clear(); - NonAliasMemDefs.clear(); - NonAliasMemUses.clear(); - - // fall-through - new_alias_chain: - // Chain all possibly aliasing memory references through SU. - if (AliasChain) { - unsigned ChainLatency = 0; - if (AliasChain->getInstr()->mayLoad()) - ChainLatency = TrueMemOrderLatency; - addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes, - ChainLatency); - } - AliasChain = SU; - for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) - addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes, - TrueMemOrderLatency); - for (MapVector >::iterator I = - AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) { - for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes); - } - for (MapVector >::iterator I = - AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { - for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes, - TrueMemOrderLatency); - } - adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, - TrueMemOrderLatency); - PendingLoads.clear(); - AliasMemDefs.clear(); - AliasMemUses.clear(); - } else if (MI->mayStore()) { - // Add dependence on barrier chain, if needed. - // There is no point to check aliasing on barrier event. Even if - // SU and barrier _could_ be reordered, they should not. In addition, - // we have lost all RejectMemNodes below barrier. - if (BarrierChain) - BarrierChain->addPred(SDep(SU, SDep::Barrier)); + continue; + } + + // If it's not a store or a variant load, we're done. + if (!MI->mayStore() && !(MI->mayLoad() && !MI->isInvariantLoad(AA))) + continue; + + // Always add dependecy edge to BarrierChain if present. + if (BarrierChain) + BarrierChain->addPredBarrier(SU); - UnderlyingObjectsVector Objs; - getUnderlyingObjectsForInstr(MI, MFI, Objs); + // Find the underlying objects for MI. The Objs vector is either + // empty, or filled with the Values of memory locations wich this + // SU depends on. An empty vector means the memory location is + // unknown, and may alias anything except NonAlias nodes. Such + // nodes are mapped to 'UnknownValue'. + UnderlyingObjectsVector Objs; + getUnderlyingObjectsForInstr(MI, MFI, Objs); + if (MI->mayStore()) { if (Objs.empty()) { - // Treat all other stores conservatively. - goto new_alias_chain; + // An unknown store depends on all stores and loads, except + // NonAliasStores and NonAliasLoads. + addChainDependencies(SU, Stores); + addChainDependencies(SU, Loads); + + // If we're not using AA, clear Stores map since all stores + // will be chained. + if (!AAForDep) + Stores.clear(); + + Stores.addToMap(SU, UnknownValue); + continue; } + // Add precise dependencies against all previously seen memory + // accesses mapped to the same Value(s). bool MayAlias = false; - for (UnderlyingObjectsVector::iterator K = Objs.begin(), KE = Objs.end(); - K != KE; ++K) { - ValueType V = K->getPointer(); - bool ThisMayAlias = K->getInt(); + for (auto &underlObj : Objs) { + ValueType V = underlObj.getPointer(); + bool ThisMayAlias = underlObj.getInt(); if (ThisMayAlias) MayAlias = true; - // A store to a specific PseudoSourceValue. Add precise dependencies. - // Record the def in MemDefs, first adding a dep if there is - // an existing def. - MapVector >::iterator I = - ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); - MapVector >::iterator IE = - ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); - if (I != IE) { - for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes, - 0, true); - - // If we're not using AA, then we only need one store per object. - if (!AAForDep) - I->second.clear(); - I->second.push_back(SU); - } else { - if (ThisMayAlias) { - if (!AAForDep) - AliasMemDefs[V].clear(); - AliasMemDefs[V].push_back(SU); - } else { - if (!AAForDep) - NonAliasMemDefs[V].clear(); - NonAliasMemDefs[V].push_back(SU); - } - } - // Handle the uses in MemUses, if there are any. - MapVector >::iterator J = - ((ThisMayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V)); - MapVector >::iterator JE = - ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); - if (J != JE) { - for (unsigned i = 0, e = J->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, SU, J->second[i], RejectMemNodes, - TrueMemOrderLatency, true); - J->second.clear(); - } + Value2SUsMap &stores_ = (ThisMayAlias ? Stores : NonAliasStores); + + // Add dependencies to previous stores and loads mapped to V. + addChainDependencies(SU, stores_, V); + addChainDependencies(SU, (ThisMayAlias ? Loads : NonAliasLoads), V); + + // If we're not using AA, then we only need one store per object. + if (!AAForDep) + stores_.clearList(V); + + // Map this store to V. + stores_.addToMap(SU, V); } if (MayAlias) { - // Add dependencies from all the PendingLoads, i.e. loads - // with no underlying object. - for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) - addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes, - TrueMemOrderLatency); - // Add dependence on alias chain, if needed. - if (AliasChain) - addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes); + // The store is not 'NonAlias', and may therefore have + // dependencies to unanalyzable loads and stores. + addChainDependencies(SU, Loads, UnknownValue); + addChainDependencies(SU, Stores, UnknownValue); + } + } + else { // SU is a load. + if (Objs.empty()) { + // An unknown load depends on all stores, except NonAliasStores. + addChainDependencies(SU, Stores); + + Loads.addToMap(SU, UnknownValue); + continue; } - adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, - TrueMemOrderLatency); - } else if (MI->mayLoad()) { - bool MayAlias = true; - if (MI->isInvariantLoad(AA)) { - // Invariant load, no chain dependencies needed! - } else { - UnderlyingObjectsVector Objs; - getUnderlyingObjectsForInstr(MI, MFI, Objs); - - if (Objs.empty()) { - // A load with no underlying object. Depend on all - // potentially aliasing stores. - for (MapVector >::iterator I = - AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) - for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, SU, I->second[i], - RejectMemNodes); - - PendingLoads.push_back(SU); + + bool MayAlias = false; + for (auto &underlObj : Objs) { + ValueType V = underlObj.getPointer(); + bool ThisMayAlias = underlObj.getInt(); + if (ThisMayAlias) MayAlias = true; - } else { - MayAlias = false; - } - for (UnderlyingObjectsVector::iterator - J = Objs.begin(), JE = Objs.end(); J != JE; ++J) { - ValueType V = J->getPointer(); - bool ThisMayAlias = J->getInt(); - - if (ThisMayAlias) - MayAlias = true; - - // A load from a specific PseudoSourceValue. Add precise dependencies. - MapVector >::iterator I = - ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); - MapVector >::iterator IE = - ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); - if (I != IE) - for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AAForDep, MFI, SU, I->second[i], - RejectMemNodes, 0, true); - if (ThisMayAlias) - AliasMemUses[V].push_back(SU); - else - NonAliasMemUses[V].push_back(SU); - } - if (MayAlias) - adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0); - // Add dependencies on alias and barrier chains, if needed. - if (MayAlias && AliasChain) - addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes); - if (BarrierChain) - BarrierChain->addPred(SDep(SU, SDep::Barrier)); + // Add precise dependencies against all previously seen stores + // mapping to the same Value(s). + addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V); + + // Map this load to V. + (ThisMayAlias ? Loads : NonAliasLoads).addToMap(SU, V); } + if (MayAlias) + // The load is not 'NonAlias', and may therefore have + // dependencies to unanalyzable stores. + addChainDependencies(SU, Stores, UnknownValue); } } if (DbgMI) @@ -1055,7 +975,6 @@ Defs.clear(); Uses.clear(); VRegDefs.clear(); - PendingLoads.clear(); } /// \brief Initialize register live-range state for updating kills.