Index: llvm/trunk/include/llvm/Analysis/MemorySSA.h =================================================================== --- llvm/trunk/include/llvm/Analysis/MemorySSA.h +++ llvm/trunk/include/llvm/Analysis/MemorySSA.h @@ -824,7 +824,8 @@ InsertionPlace); void insertIntoListsBefore(MemoryAccess *, const BasicBlock *, AccessList::iterator); - MemoryUseOrDef *createDefinedAccess(Instruction *, MemoryAccess *); + MemoryUseOrDef *createDefinedAccess(Instruction *, MemoryAccess *, + const MemoryUseOrDef *Template = nullptr); private: class CachingWalker; @@ -845,7 +846,8 @@ void markUnreachableAsLiveOnEntry(BasicBlock *BB); bool dominatesUse(const MemoryAccess *, const MemoryAccess *) const; MemoryPhi *createMemoryPhi(BasicBlock *BB); - MemoryUseOrDef *createNewAccess(Instruction *); + MemoryUseOrDef *createNewAccess(Instruction *, + const MemoryUseOrDef *Template = nullptr); MemoryAccess *findDominatingDef(BasicBlock *, enum InsertionPlace); void placePHINodes(const SmallPtrSetImpl &); MemoryAccess *renameBlock(BasicBlock *, MemoryAccess *, bool); Index: llvm/trunk/include/llvm/Analysis/MemorySSAUpdater.h =================================================================== --- llvm/trunk/include/llvm/Analysis/MemorySSAUpdater.h +++ llvm/trunk/include/llvm/Analysis/MemorySSAUpdater.h @@ -35,8 +35,10 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFGDiff.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Module.h" #include "llvm/IR/OperandTraits.h" @@ -45,6 +47,7 @@ #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/IR/ValueMap.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" @@ -57,6 +60,12 @@ class LLVMContext; class raw_ostream; +using ValueToValueMapTy = ValueMap; +using PhiToDefMap = SmallDenseMap; +using CFGUpdate = cfg::Update; +using GraphDiffInvBBPair = + std::pair *, Inverse>; + class MemorySSAUpdater { private: MemorySSA *MSSA; @@ -70,6 +79,7 @@ public: MemorySSAUpdater(MemorySSA *MSSA) : MSSA(MSSA) {} + /// Insert a definition into the MemorySSA IR. RenameUses will rename any use /// below the new def block (and any inserted phis). RenameUses should be set /// to true if the definition may cause new aliases for loads below it. This @@ -89,6 +99,39 @@ /// Where a mayalias b, *does* require RenameUses be set to true. void insertDef(MemoryDef *Def, bool RenameUses = false); void insertUse(MemoryUse *Use); + /// Update the MemoryPhi in `To` following an edge deletion between `From` and + /// `To`. If `To` becomes unreachable, a call to removeBlocks should be made. + void removeEdge(BasicBlock *From, BasicBlock *To); + /// Update the MemoryPhi in `To` to have a single incoming edge from `From`, + /// following a CFG change that replaced multiple edges (switch) with a direct + /// branch. + void removeDuplicatePhiEdgesBetween(BasicBlock *From, BasicBlock *To); + /// Update MemorySSA after a loop was cloned, given the blocks in RPO order, + /// the exit blocks and a 1:1 mapping of all blocks and instructions + /// cloned. This involves duplicating all defs and uses in the cloned blocks + /// Updating phi nodes in exit block successors is done separately. + void updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, + ArrayRef ExitBlocks, + const ValueToValueMapTy &VM, + bool IgnoreIncomingWithNoClones = false); + // Block BB was fully or partially cloned into its predecessor P1. Map + // contains the 1:1 mapping of instructions cloned and VM[BB]=P1. + void updateForClonedBlockIntoPred(BasicBlock *BB, BasicBlock *P1, + const ValueToValueMapTy &VM); + /// Update phi nodes in exit block successors following cloning. Exit blocks + /// that were not cloned don't have additional predecessors added. + void updateExitBlocksForClonedLoop(ArrayRef ExitBlocks, + const ValueToValueMapTy &VMap, + DominatorTree &DT); + void updateExitBlocksForClonedLoop( + ArrayRef ExitBlocks, + ArrayRef> VMaps, DominatorTree &DT); + + /// Apply CFG updates, analogous with the DT edge updates. + void applyUpdates(ArrayRef Updates, DominatorTree &DT); + /// Apply CFG insert updates, analogous with the DT edge updates. + void applyInsertUpdates(ArrayRef Updates, DominatorTree &DT); + void moveBefore(MemoryUseOrDef *What, MemoryUseOrDef *Where); void moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where); void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, @@ -219,6 +262,23 @@ template MemoryAccess *tryRemoveTrivialPhi(MemoryPhi *Phi, RangeType &Operands); void fixupDefs(const SmallVectorImpl &); + // Clone all uses and defs from BB to NewBB given a 1:1 map of all + // instructions and blocks cloned, and a map of MemoryPhi : Definition + // (MemoryAccess Phi or Def). VMap maps old instructions to cloned + // instructions and old blocks to cloned blocks. MPhiMap, is created in the + // caller of this private method, and maps existing MemoryPhis to new + // definitions that new MemoryAccesses must point to. These definitions may + // not necessarily be MemoryPhis themselves, they may be MemoryDefs. As such, + // the map is between MemoryPhis and MemoryAccesses, where the MemoryAccesses + // may be MemoryPhis or MemoryDefs and not MemoryUses. + void cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB, + const ValueToValueMapTy &VMap, PhiToDefMap &MPhiMap); + template + void privateUpdateExitBlocksForClonedLoop(ArrayRef ExitBlocks, + Iter ValuesBegin, Iter ValuesEnd, + DominatorTree &DT); + void applyInsertUpdates(ArrayRef, DominatorTree &DT, + const GraphDiff *GD); }; } // end namespace llvm Index: llvm/trunk/lib/Analysis/MemorySSA.cpp =================================================================== --- llvm/trunk/lib/Analysis/MemorySSA.cpp +++ llvm/trunk/lib/Analysis/MemorySSA.cpp @@ -1542,9 +1542,10 @@ } MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I, - MemoryAccess *Definition) { + MemoryAccess *Definition, + const MemoryUseOrDef *Template) { assert(!isa(I) && "Cannot create a defined access for a PHI"); - MemoryUseOrDef *NewAccess = createNewAccess(I); + MemoryUseOrDef *NewAccess = createNewAccess(I, Template); assert( NewAccess != nullptr && "Tried to create a memory access for a non-memory touching instruction"); @@ -1567,7 +1568,8 @@ } /// Helper function to create new memory accesses -MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) { +MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I, + const MemoryUseOrDef *Template) { // The assume intrinsic has a control dependency which we model by claiming // that it writes arbitrarily. Ignore that fake memory dependency here. // FIXME: Replace this special casing with a more accurate modelling of @@ -1576,18 +1578,31 @@ if (II->getIntrinsicID() == Intrinsic::assume) return nullptr; - // Find out what affect this instruction has on memory. - ModRefInfo ModRef = AA->getModRefInfo(I, None); - // The isOrdered check is used to ensure that volatiles end up as defs - // (atomics end up as ModRef right now anyway). Until we separate the - // ordering chain from the memory chain, this enables people to see at least - // some relative ordering to volatiles. Note that getClobberingMemoryAccess - // will still give an answer that bypasses other volatile loads. TODO: - // Separate memory aliasing and ordering into two different chains so that we - // can precisely represent both "what memory will this read/write/is clobbered - // by" and "what instructions can I move this past". - bool Def = isModSet(ModRef) || isOrdered(I); - bool Use = isRefSet(ModRef); + bool Def, Use; + if (Template) { + Def = dyn_cast_or_null(Template) != nullptr; + Use = dyn_cast_or_null(Template) != nullptr; +#if !defined(NDEBUG) + ModRefInfo ModRef = AA->getModRefInfo(I, None); + bool DefCheck, UseCheck; + DefCheck = isModSet(ModRef) || isOrdered(I); + UseCheck = isRefSet(ModRef); + assert(Def == DefCheck && (Def || Use == UseCheck) && "Invalid template"); +#endif + } else { + // Find out what affect this instruction has on memory. + ModRefInfo ModRef = AA->getModRefInfo(I, None); + // The isOrdered check is used to ensure that volatiles end up as defs + // (atomics end up as ModRef right now anyway). Until we separate the + // ordering chain from the memory chain, this enables people to see at least + // some relative ordering to volatiles. Note that getClobberingMemoryAccess + // will still give an answer that bypasses other volatile loads. TODO: + // Separate memory aliasing and ordering into two different chains so that + // we can precisely represent both "what memory will this read/write/is + // clobbered by" and "what instructions can I move this past". + Def = isModSet(ModRef) || isOrdered(I); + Use = isRefSet(ModRef); + } // It's possible for an instruction to not modify memory at all. During // construction, we ignore them. Index: llvm/trunk/lib/Analysis/MemorySSAUpdater.cpp =================================================================== --- llvm/trunk/lib/Analysis/MemorySSAUpdater.cpp +++ llvm/trunk/lib/Analysis/MemorySSAUpdater.cpp @@ -12,7 +12,9 @@ //===----------------------------------------------------------------===// #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/IteratedDominanceFrontier.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" @@ -392,6 +394,522 @@ } } +void MemorySSAUpdater::removeEdge(BasicBlock *From, BasicBlock *To) { + if (MemoryPhi *MPhi = MSSA->getMemoryAccess(To)) { + MPhi->unorderedDeleteIncomingBlock(From); + if (MPhi->getNumIncomingValues() == 1) + removeMemoryAccess(MPhi); + } +} + +void MemorySSAUpdater::removeDuplicatePhiEdgesBetween(BasicBlock *From, + BasicBlock *To) { + if (MemoryPhi *MPhi = MSSA->getMemoryAccess(To)) { + bool Found = false; + MPhi->unorderedDeleteIncomingIf([&](const MemoryAccess *, BasicBlock *B) { + if (From != B) + return false; + if (Found) + return true; + Found = true; + return false; + }); + if (MPhi->getNumIncomingValues() == 1) + removeMemoryAccess(MPhi); + } +} + +void MemorySSAUpdater::cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB, + const ValueToValueMapTy &VMap, + PhiToDefMap &MPhiMap) { + auto GetNewDefiningAccess = [&](MemoryAccess *MA) -> MemoryAccess * { + MemoryAccess *InsnDefining = MA; + if (MemoryUseOrDef *DefMUD = dyn_cast(InsnDefining)) { + if (!MSSA->isLiveOnEntryDef(DefMUD)) { + Instruction *DefMUDI = DefMUD->getMemoryInst(); + assert(DefMUDI && "Found MemoryUseOrDef with no Instruction."); + if (Instruction *NewDefMUDI = + cast_or_null(VMap.lookup(DefMUDI))) + InsnDefining = MSSA->getMemoryAccess(NewDefMUDI); + } + } else { + MemoryPhi *DefPhi = cast(InsnDefining); + if (MemoryAccess *NewDefPhi = MPhiMap.lookup(DefPhi)) + InsnDefining = NewDefPhi; + } + assert(InsnDefining && "Defining instruction cannot be nullptr."); + return InsnDefining; + }; + + const MemorySSA::AccessList *Acc = MSSA->getBlockAccesses(BB); + if (!Acc) + return; + for (const MemoryAccess &MA : *Acc) { + if (const MemoryUseOrDef *MUD = dyn_cast(&MA)) { + Instruction *Insn = MUD->getMemoryInst(); + // Entry does not exist if the clone of the block did not clone all + // instructions. This occurs in LoopRotate when cloning instructions + // from the old header to the old preheader. The cloned instruction may + // also be a simplified Value, not an Instruction (see LoopRotate). + if (Instruction *NewInsn = + dyn_cast_or_null(VMap.lookup(Insn))) { + MemoryAccess *NewUseOrDef = MSSA->createDefinedAccess( + NewInsn, GetNewDefiningAccess(MUD->getDefiningAccess()), MUD); + MSSA->insertIntoListsForBlock(NewUseOrDef, NewBB, MemorySSA::End); + } + } + } +} + +void MemorySSAUpdater::updateForClonedLoop(const LoopBlocksRPO &LoopBlocks, + ArrayRef ExitBlocks, + const ValueToValueMapTy &VMap, + bool IgnoreIncomingWithNoClones) { + PhiToDefMap MPhiMap; + + auto FixPhiIncomingValues = [&](MemoryPhi *Phi, MemoryPhi *NewPhi) { + assert(Phi && NewPhi && "Invalid Phi nodes."); + BasicBlock *NewPhiBB = NewPhi->getBlock(); + SmallPtrSet NewPhiBBPreds(pred_begin(NewPhiBB), + pred_end(NewPhiBB)); + for (unsigned It = 0, E = Phi->getNumIncomingValues(); It < E; ++It) { + MemoryAccess *IncomingAccess = Phi->getIncomingValue(It); + BasicBlock *IncBB = Phi->getIncomingBlock(It); + + if (BasicBlock *NewIncBB = cast_or_null(VMap.lookup(IncBB))) + IncBB = NewIncBB; + else if (IgnoreIncomingWithNoClones) + continue; + + // Now we have IncBB, and will need to add incoming from it to NewPhi. + + // If IncBB is not a predecessor of NewPhiBB, then do not add it. + // NewPhiBB was cloned without that edge. + if (!NewPhiBBPreds.count(IncBB)) + continue; + + // Determine incoming value and add it as incoming from IncBB. + if (MemoryUseOrDef *IncMUD = dyn_cast(IncomingAccess)) { + if (!MSSA->isLiveOnEntryDef(IncMUD)) { + Instruction *IncI = IncMUD->getMemoryInst(); + assert(IncI && "Found MemoryUseOrDef with no Instruction."); + if (Instruction *NewIncI = + cast_or_null(VMap.lookup(IncI))) { + IncMUD = MSSA->getMemoryAccess(NewIncI); + assert(IncMUD && + "MemoryUseOrDef cannot be null, all preds processed."); + } + } + NewPhi->addIncoming(IncMUD, IncBB); + } else { + MemoryPhi *IncPhi = cast(IncomingAccess); + if (MemoryAccess *NewDefPhi = MPhiMap.lookup(IncPhi)) + NewPhi->addIncoming(NewDefPhi, IncBB); + else + NewPhi->addIncoming(IncPhi, IncBB); + } + } + }; + + auto ProcessBlock = [&](BasicBlock *BB) { + BasicBlock *NewBlock = cast_or_null(VMap.lookup(BB)); + if (!NewBlock) + return; + + assert(!MSSA->getWritableBlockAccesses(NewBlock) && + "Cloned block should have no accesses"); + + // Add MemoryPhi. + if (MemoryPhi *MPhi = MSSA->getMemoryAccess(BB)) { + MemoryPhi *NewPhi = MSSA->createMemoryPhi(NewBlock); + MPhiMap[MPhi] = NewPhi; + } + // Update Uses and Defs. + cloneUsesAndDefs(BB, NewBlock, VMap, MPhiMap); + }; + + for (auto BB : llvm::concat(LoopBlocks, ExitBlocks)) + ProcessBlock(BB); + + for (auto BB : llvm::concat(LoopBlocks, ExitBlocks)) + if (MemoryPhi *MPhi = MSSA->getMemoryAccess(BB)) + if (MemoryAccess *NewPhi = MPhiMap.lookup(MPhi)) + FixPhiIncomingValues(MPhi, cast(NewPhi)); +} + +void MemorySSAUpdater::updateForClonedBlockIntoPred( + BasicBlock *BB, BasicBlock *P1, const ValueToValueMapTy &VM) { + // All defs/phis from outside BB that are used in BB, are valid uses in P1. + // Since those defs/phis must have dominated BB, and also dominate P1. + // Defs from BB being used in BB will be replaced with the cloned defs from + // VM. The uses of BB's Phi (if it exists) in BB will be replaced by the + // incoming def into the Phi from P1. + PhiToDefMap MPhiMap; + if (MemoryPhi *MPhi = MSSA->getMemoryAccess(BB)) + MPhiMap[MPhi] = MPhi->getIncomingValueForBlock(P1); + cloneUsesAndDefs(BB, P1, VM, MPhiMap); +} + +template +void MemorySSAUpdater::privateUpdateExitBlocksForClonedLoop( + ArrayRef ExitBlocks, Iter ValuesBegin, Iter ValuesEnd, + DominatorTree &DT) { + SmallVector Updates; + // Update/insert phis in all successors of exit blocks. + for (auto *Exit : ExitBlocks) + for (const ValueToValueMapTy *VMap : make_range(ValuesBegin, ValuesEnd)) + if (BasicBlock *NewExit = cast_or_null(VMap->lookup(Exit))) { + BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0); + Updates.push_back({DT.Insert, NewExit, ExitSucc}); + } + applyInsertUpdates(Updates, DT); +} + +void MemorySSAUpdater::updateExitBlocksForClonedLoop( + ArrayRef ExitBlocks, const ValueToValueMapTy &VMap, + DominatorTree &DT) { + const ValueToValueMapTy *const Arr[] = {&VMap}; + privateUpdateExitBlocksForClonedLoop(ExitBlocks, std::begin(Arr), + std::end(Arr), DT); +} + +void MemorySSAUpdater::updateExitBlocksForClonedLoop( + ArrayRef ExitBlocks, + ArrayRef> VMaps, DominatorTree &DT) { + auto GetPtr = [&](const std::unique_ptr &I) { + return I.get(); + }; + using MappedIteratorType = + mapped_iterator *, + decltype(GetPtr)>; + auto MapBegin = MappedIteratorType(VMaps.begin(), GetPtr); + auto MapEnd = MappedIteratorType(VMaps.end(), GetPtr); + privateUpdateExitBlocksForClonedLoop(ExitBlocks, MapBegin, MapEnd, DT); +} + +void MemorySSAUpdater::applyUpdates(ArrayRef Updates, + DominatorTree &DT) { + SmallVector RevDeleteUpdates; + SmallVector InsertUpdates; + for (auto &Update : Updates) { + if (Update.getKind() == DT.Insert) + InsertUpdates.push_back({DT.Insert, Update.getFrom(), Update.getTo()}); + else + RevDeleteUpdates.push_back({DT.Insert, Update.getFrom(), Update.getTo()}); + } + + if (!RevDeleteUpdates.empty()) { + // Update for inserted edges: use newDT and snapshot CFG as if deletes had + // not occured. + // FIXME: This creates a new DT, so it's more expensive to do mix + // delete/inserts vs just inserts. We can do an incremental update on the DT + // to revert deletes, than re-delete the edges. Teaching DT to do this, is + // part of a pending cleanup. + DominatorTree NewDT(DT, RevDeleteUpdates); + GraphDiff GD(RevDeleteUpdates); + applyInsertUpdates(InsertUpdates, NewDT, &GD); + } else { + GraphDiff GD; + applyInsertUpdates(InsertUpdates, DT, &GD); + } + + // Update for deleted edges + for (auto &Update : RevDeleteUpdates) + removeEdge(Update.getFrom(), Update.getTo()); +} + +void MemorySSAUpdater::applyInsertUpdates(ArrayRef Updates, + DominatorTree &DT) { + GraphDiff GD; + applyInsertUpdates(Updates, DT, &GD); +} + +void MemorySSAUpdater::applyInsertUpdates(ArrayRef Updates, + DominatorTree &DT, + const GraphDiff *GD) { + // Get recursive last Def, assuming well formed MSSA and updated DT. + auto GetLastDef = [&](BasicBlock *BB) -> MemoryAccess * { + while (true) { + MemorySSA::DefsList *Defs = MSSA->getWritableBlockDefs(BB); + // Return last Def or Phi in BB, if it exists. + if (Defs) + return &*(--Defs->end()); + + // Check number of predecessors, we only care if there's more than one. + unsigned Count = 0; + BasicBlock *Pred = nullptr; + for (auto &Pair : children({GD, BB})) { + Pred = Pair.second; + Count++; + if (Count == 2) + break; + } + + // If BB has multiple predecessors, get last definition from IDom. + if (Count != 1) { + // [SimpleLoopUnswitch] If BB is a dead block, about to be deleted, its + // DT is invalidated. Return LoE as its last def. This will be added to + // MemoryPhi node, and later deleted when the block is deleted. + if (!DT.getNode(BB)) + return MSSA->getLiveOnEntryDef(); + if (auto *IDom = DT.getNode(BB)->getIDom()) + if (IDom->getBlock() != BB) { + BB = IDom->getBlock(); + continue; + } + return MSSA->getLiveOnEntryDef(); + } else { + // Single predecessor, BB cannot be dead. GetLastDef of Pred. + assert(Count == 1 && Pred && "Single predecessor expected."); + BB = Pred; + } + }; + llvm_unreachable("Unable to get last definition."); + }; + + // Get nearest IDom given a set of blocks. + // TODO: this can be optimized by starting the search at the node with the + // lowest level (highest in the tree). + auto FindNearestCommonDominator = + [&](const SmallSetVector &BBSet) -> BasicBlock * { + BasicBlock *PrevIDom = *BBSet.begin(); + for (auto *BB : BBSet) + PrevIDom = DT.findNearestCommonDominator(PrevIDom, BB); + return PrevIDom; + }; + + // Get all blocks that dominate PrevIDom, stop when reaching CurrIDom. Do not + // include CurrIDom. + auto GetNoLongerDomBlocks = + [&](BasicBlock *PrevIDom, BasicBlock *CurrIDom, + SmallVectorImpl &BlocksPrevDom) { + if (PrevIDom == CurrIDom) + return; + BlocksPrevDom.push_back(PrevIDom); + BasicBlock *NextIDom = PrevIDom; + while (BasicBlock *UpIDom = + DT.getNode(NextIDom)->getIDom()->getBlock()) { + if (UpIDom == CurrIDom) + break; + BlocksPrevDom.push_back(UpIDom); + NextIDom = UpIDom; + } + }; + + // Map a BB to its predecessors: added + previously existing. To get a + // deterministic order, store predecessors as SetVectors. The order in each + // will be defined by teh order in Updates (fixed) and the order given by + // children<> (also fixed). Since we further iterate over these ordered sets, + // we lose the information of multiple edges possibly existing between two + // blocks, so we'll keep and EdgeCount map for that. + // An alternate implementation could keep unordered set for the predecessors, + // traverse either Updates or children<> each time to get the deterministic + // order, and drop the usage of EdgeCount. This alternate approach would still + // require querying the maps for each predecessor, and children<> call has + // additional computation inside for creating the snapshot-graph predecessors. + // As such, we favor using a little additional storage and less compute time. + // This decision can be revisited if we find the alternative more favorable. + + struct PredInfo { + SmallSetVector Added; + SmallSetVector Prev; + }; + SmallDenseMap PredMap; + + for (auto &Edge : Updates) { + BasicBlock *BB = Edge.getTo(); + auto &AddedBlockSet = PredMap[BB].Added; + AddedBlockSet.insert(Edge.getFrom()); + } + + // Store all existing predecessor for each BB, at least one must exist. + SmallDenseMap, int> EdgeCountMap; + SmallPtrSet NewBlocks; + for (auto &BBPredPair : PredMap) { + auto *BB = BBPredPair.first; + const auto &AddedBlockSet = BBPredPair.second.Added; + auto &PrevBlockSet = BBPredPair.second.Prev; + for (auto &Pair : children({GD, BB})) { + BasicBlock *Pi = Pair.second; + if (!AddedBlockSet.count(Pi)) + PrevBlockSet.insert(Pi); + EdgeCountMap[{Pi, BB}]++; + } + + if (PrevBlockSet.empty()) { + assert(pred_size(BB) == AddedBlockSet.size() && "Duplicate edges added."); + LLVM_DEBUG( + dbgs() + << "Adding a predecessor to a block with no predecessors. " + "This must be an edge added to a new, likely cloned, block. " + "Its memory accesses must be already correct, assuming completed " + "via the updateExitBlocksForClonedLoop API. " + "Assert a single such edge is added so no phi addition or " + "additional processing is required.\n"); + assert(AddedBlockSet.size() == 1 && + "Can only handle adding one predecessor to a new block."); + // Need to remove new blocks from PredMap. Remove below to not invalidate + // iterator here. + NewBlocks.insert(BB); + } + } + // Nothing to process for new/cloned blocks. + for (auto *BB : NewBlocks) + PredMap.erase(BB); + + SmallVector BlocksToProcess; + SmallVector BlocksWithDefsToReplace; + + // First create MemoryPhis in all blocks that don't have one. Create in the + // order found in Updates, not in PredMap, to get deterministic numbering. + for (auto &Edge : Updates) { + BasicBlock *BB = Edge.getTo(); + if (PredMap.count(BB) && !MSSA->getMemoryAccess(BB)) + MSSA->createMemoryPhi(BB); + } + + // Now we'll fill in the MemoryPhis with the right incoming values. + for (auto &BBPredPair : PredMap) { + auto *BB = BBPredPair.first; + const auto &PrevBlockSet = BBPredPair.second.Prev; + const auto &AddedBlockSet = BBPredPair.second.Added; + assert(!PrevBlockSet.empty() && + "At least one previous predecessor must exist."); + + // TODO: if this becomes a bottleneck, we can save on GetLastDef calls by + // keeping this map before the loop. We can reuse already populated entries + // if an edge is added from the same predecessor to two different blocks, + // and this does happen in rotate. Note that the map needs to be updated + // when deleting non-necessary phis below, if the phi is in the map by + // replacing the value with DefP1. + SmallDenseMap LastDefAddedPred; + for (auto *AddedPred : AddedBlockSet) { + auto *DefPn = GetLastDef(AddedPred); + assert(DefPn != nullptr && "Unable to find last definition."); + LastDefAddedPred[AddedPred] = DefPn; + } + + MemoryPhi *NewPhi = MSSA->getMemoryAccess(BB); + // If Phi is not empty, add an incoming edge from each added pred. Must + // still compute blocks with defs to replace for this block below. + if (NewPhi->getNumOperands()) { + for (auto *Pred : AddedBlockSet) { + auto *LastDefForPred = LastDefAddedPred[Pred]; + for (int I = 0, E = EdgeCountMap[{Pred, BB}]; I < E; ++I) + NewPhi->addIncoming(LastDefForPred, Pred); + } + } else { + // Pick any existing predecessor and get its definition. All other + // existing predecessors should have the same one, since no phi existed. + auto *P1 = *PrevBlockSet.begin(); + MemoryAccess *DefP1 = GetLastDef(P1); + + // Check DefP1 against all Defs in LastDefPredPair. If all the same, + // nothing to add. + bool InsertPhi = false; + for (auto LastDefPredPair : LastDefAddedPred) + if (DefP1 != LastDefPredPair.second) { + InsertPhi = true; + break; + } + if (!InsertPhi) { + // Since NewPhi may be used in other newly added Phis, replace all uses + // of NewPhi with the definition coming from all predecessors (DefP1), + // before deleting it. + NewPhi->replaceAllUsesWith(DefP1); + removeMemoryAccess(NewPhi); + continue; + } + + // Update Phi with new values for new predecessors and old value for all + // other predecessors. Since AddedBlockSet and PrevBlockSet are ordered + // sets, the order of entries in NewPhi is deterministic. + for (auto *Pred : AddedBlockSet) { + auto *LastDefForPred = LastDefAddedPred[Pred]; + for (int I = 0, E = EdgeCountMap[{Pred, BB}]; I < E; ++I) + NewPhi->addIncoming(LastDefForPred, Pred); + } + for (auto *Pred : PrevBlockSet) + for (int I = 0, E = EdgeCountMap[{Pred, BB}]; I < E; ++I) + NewPhi->addIncoming(DefP1, Pred); + + // Insert BB in the set of blocks that now have definition. We'll use this + // to compute IDF and add Phis there next. + BlocksToProcess.push_back(BB); + } + + // Get all blocks that used to dominate BB and no longer do after adding + // AddedBlockSet, where PrevBlockSet are the previously known predecessors. + assert(DT.getNode(BB)->getIDom() && "BB does not have valid idom"); + BasicBlock *PrevIDom = FindNearestCommonDominator(PrevBlockSet); + assert(PrevIDom && "Previous IDom should exists"); + BasicBlock *NewIDom = DT.getNode(BB)->getIDom()->getBlock(); + assert(NewIDom && "BB should have a new valid idom"); + assert(DT.dominates(NewIDom, PrevIDom) && + "New idom should dominate old idom"); + GetNoLongerDomBlocks(PrevIDom, NewIDom, BlocksWithDefsToReplace); + } + + // Compute IDF and add Phis in all IDF blocks that do not have one. + SmallVector IDFBlocks; + if (!BlocksToProcess.empty()) { + ForwardIDFCalculator IDFs(DT); + SmallPtrSet DefiningBlocks(BlocksToProcess.begin(), + BlocksToProcess.end()); + IDFs.setDefiningBlocks(DefiningBlocks); + IDFs.calculate(IDFBlocks); + for (auto *BBIDF : IDFBlocks) { + if (auto *IDFPhi = MSSA->getMemoryAccess(BBIDF)) { + // Update existing Phi. + // FIXME: some updates may be redundant, try to optimize and skip some. + for (unsigned I = 0, E = IDFPhi->getNumIncomingValues(); I < E; ++I) + IDFPhi->setIncomingValue(I, GetLastDef(IDFPhi->getIncomingBlock(I))); + } else { + IDFPhi = MSSA->createMemoryPhi(BBIDF); + for (auto &Pair : children({GD, BBIDF})) { + BasicBlock *Pi = Pair.second; + IDFPhi->addIncoming(GetLastDef(Pi), Pi); + } + } + } + } + + // Now for all defs in BlocksWithDefsToReplace, if there are uses they no + // longer dominate, replace those with the closest dominating def. + // This will also update optimized accesses, as they're also uses. + for (auto *BlockWithDefsToReplace : BlocksWithDefsToReplace) { + if (auto DefsList = MSSA->getWritableBlockDefs(BlockWithDefsToReplace)) { + for (auto &DefToReplaceUses : *DefsList) { + BasicBlock *DominatingBlock = DefToReplaceUses.getBlock(); + Value::use_iterator UI = DefToReplaceUses.use_begin(), + E = DefToReplaceUses.use_end(); + for (; UI != E;) { + Use &U = *UI; + ++UI; + MemoryAccess *Usr = dyn_cast(U.getUser()); + if (MemoryPhi *UsrPhi = dyn_cast(Usr)) { + BasicBlock *DominatedBlock = UsrPhi->getIncomingBlock(U); + if (!DT.dominates(DominatingBlock, DominatedBlock)) + U.set(GetLastDef(DominatedBlock)); + } else { + BasicBlock *DominatedBlock = Usr->getBlock(); + if (!DT.dominates(DominatingBlock, DominatedBlock)) { + if (auto *DomBlPhi = MSSA->getMemoryAccess(DominatedBlock)) + U.set(DomBlPhi); + else { + auto *IDom = DT.getNode(DominatedBlock)->getIDom(); + assert(IDom && "Block must have a valid IDom."); + U.set(GetLastDef(IDom->getBlock())); + } + cast(Usr)->resetOptimized(); + } + } + } + } + } + } +} + // Move What before Where in the MemorySSA IR. template void MemorySSAUpdater::moveTo(MemoryUseOrDef *What, BasicBlock *BB, Index: llvm/trunk/unittests/Analysis/MemorySSATest.cpp =================================================================== --- llvm/trunk/unittests/Analysis/MemorySSATest.cpp +++ llvm/trunk/unittests/Analysis/MemorySSATest.cpp @@ -1393,3 +1393,195 @@ (std::vector{StoreAAccess, StoreAAccess, StoreBAccess})); } + +// entry +// | +// header +// / \ +// body | +// \ / +// exit +// header: +// ; 1 = MemoryDef(liveOnEntry) +// body: +// ; 2 = MemoryDef(1) +// exit: +// ; 3 = MemoryPhi({body, 2}, {header, 1}) +// ; 4 = MemoryDef(3); optimized to 3, cannot optimize thorugh phi. +// Insert edge: entry -> exit, check mssa Update is correct. +TEST_F(MemorySSATest, TestAddedEdgeToBlockWithPhiNotOpt) { + F = Function::Create( + FunctionType::get(B.getVoidTy(), {B.getInt8PtrTy()}, false), + GlobalValue::ExternalLinkage, "F", &M); + Argument *PointerArg = &*F->arg_begin(); + BasicBlock *Entry(BasicBlock::Create(C, "entry", F)); + BasicBlock *Header(BasicBlock::Create(C, "header", F)); + BasicBlock *Body(BasicBlock::Create(C, "body", F)); + BasicBlock *Exit(BasicBlock::Create(C, "exit", F)); + B.SetInsertPoint(Entry); + BranchInst::Create(Header, Entry); + B.SetInsertPoint(Header); + B.CreateStore(B.getInt8(16), PointerArg); + B.CreateCondBr(B.getTrue(), Exit, Body); + B.SetInsertPoint(Body); + B.CreateStore(B.getInt8(16), PointerArg); + BranchInst::Create(Exit, Body); + B.SetInsertPoint(Exit); + StoreInst *S1 = B.CreateStore(B.getInt8(16), PointerArg); + + setupAnalyses(); + MemorySSA &MSSA = *Analyses->MSSA; + MemorySSAWalker *Walker = Analyses->Walker; + std::unique_ptr MSSAU = + make_unique(&MSSA); + + MemoryPhi *Phi = MSSA.getMemoryAccess(Exit); + EXPECT_EQ(Phi, Walker->getClobberingMemoryAccess(S1)); + + // Alter CFG, add edge: entry -> exit + Entry->getTerminator()->eraseFromParent(); + B.SetInsertPoint(Entry); + B.CreateCondBr(B.getTrue(), Header, Exit); + SmallVector Updates; + Updates.push_back({cfg::UpdateKind::Insert, Entry, Exit}); + Analyses->DT.applyUpdates(Updates); + MSSAU->applyInsertUpdates(Updates, Analyses->DT); + EXPECT_EQ(Phi, Walker->getClobberingMemoryAccess(S1)); +} + +// entry +// | +// header +// / \ +// body | +// \ / +// exit +// header: +// ; 1 = MemoryDef(liveOnEntry) +// body: +// ; 2 = MemoryDef(1) +// exit: +// ; 3 = MemoryPhi({body, 2}, {header, 1}) +// ; 4 = MemoryDef(3); optimize this to 1 now, added edge should invalidate +// the optimized access. +// Insert edge: entry -> exit, check mssa Update is correct. +TEST_F(MemorySSATest, TestAddedEdgeToBlockWithPhiOpt) { + F = Function::Create( + FunctionType::get(B.getVoidTy(), {B.getInt8PtrTy()}, false), + GlobalValue::ExternalLinkage, "F", &M); + Argument *PointerArg = &*F->arg_begin(); + Type *Int8 = Type::getInt8Ty(C); + BasicBlock *Entry(BasicBlock::Create(C, "entry", F)); + BasicBlock *Header(BasicBlock::Create(C, "header", F)); + BasicBlock *Body(BasicBlock::Create(C, "body", F)); + BasicBlock *Exit(BasicBlock::Create(C, "exit", F)); + + B.SetInsertPoint(Entry); + Value *Alloca = B.CreateAlloca(Int8, ConstantInt::get(Int8, 1), "A"); + BranchInst::Create(Header, Entry); + + B.SetInsertPoint(Header); + StoreInst *S1 = B.CreateStore(B.getInt8(16), PointerArg); + B.CreateCondBr(B.getTrue(), Exit, Body); + + B.SetInsertPoint(Body); + B.CreateStore(ConstantInt::get(Int8, 0), Alloca); + BranchInst::Create(Exit, Body); + + B.SetInsertPoint(Exit); + StoreInst *S2 = B.CreateStore(B.getInt8(16), PointerArg); + + setupAnalyses(); + MemorySSA &MSSA = *Analyses->MSSA; + MemorySSAWalker *Walker = Analyses->Walker; + std::unique_ptr MSSAU = + make_unique(&MSSA); + + MemoryDef *DefS1 = cast(MSSA.getMemoryAccess(S1)); + EXPECT_EQ(DefS1, Walker->getClobberingMemoryAccess(S2)); + + // Alter CFG, add edge: entry -> exit + Entry->getTerminator()->eraseFromParent(); + B.SetInsertPoint(Entry); + B.CreateCondBr(B.getTrue(), Header, Exit); + SmallVector Updates; + Updates.push_back({cfg::UpdateKind::Insert, Entry, Exit}); + Analyses->DT.applyUpdates(Updates); + MSSAU->applyInsertUpdates(Updates, Analyses->DT); + + MemoryPhi *Phi = MSSA.getMemoryAccess(Exit); + EXPECT_EQ(Phi, Walker->getClobberingMemoryAccess(S2)); +} + +// entry +// / | +// a | +// / \ | +// b c f +// \ / | +// d | +// \ / +// e +// f: +// ; 1 = MemoryDef(liveOnEntry) +// e: +// ; 2 = MemoryPhi({d, liveOnEntry}, {f, 1}) +// +// Insert edge: f -> c, check update is correct. +// After update: +// f: +// ; 1 = MemoryDef(liveOnEntry) +// c: +// ; 3 = MemoryPhi({a, liveOnEntry}, {f, 1}) +// d: +// ; 4 = MemoryPhi({b, liveOnEntry}, {c, 3}) +// e: +// ; 2 = MemoryPhi({d, 4}, {f, 1}) +TEST_F(MemorySSATest, TestAddedEdgeToBlockWithNoPhiAddNewPhis) { + F = Function::Create( + FunctionType::get(B.getVoidTy(), {B.getInt8PtrTy()}, false), + GlobalValue::ExternalLinkage, "F", &M); + Argument *PointerArg = &*F->arg_begin(); + BasicBlock *Entry(BasicBlock::Create(C, "entry", F)); + BasicBlock *ABlock(BasicBlock::Create(C, "a", F)); + BasicBlock *BBlock(BasicBlock::Create(C, "b", F)); + BasicBlock *CBlock(BasicBlock::Create(C, "c", F)); + BasicBlock *DBlock(BasicBlock::Create(C, "d", F)); + BasicBlock *EBlock(BasicBlock::Create(C, "e", F)); + BasicBlock *FBlock(BasicBlock::Create(C, "f", F)); + + B.SetInsertPoint(Entry); + B.CreateCondBr(B.getTrue(), ABlock, FBlock); + B.SetInsertPoint(ABlock); + B.CreateCondBr(B.getTrue(), BBlock, CBlock); + B.SetInsertPoint(BBlock); + BranchInst::Create(DBlock, BBlock); + B.SetInsertPoint(CBlock); + BranchInst::Create(DBlock, CBlock); + B.SetInsertPoint(DBlock); + BranchInst::Create(EBlock, DBlock); + B.SetInsertPoint(FBlock); + B.CreateStore(B.getInt8(16), PointerArg); + BranchInst::Create(EBlock, FBlock); + + setupAnalyses(); + MemorySSA &MSSA = *Analyses->MSSA; + std::unique_ptr MSSAU = + make_unique(&MSSA); + + // Alter CFG, add edge: f -> c + FBlock->getTerminator()->eraseFromParent(); + B.SetInsertPoint(FBlock); + B.CreateCondBr(B.getTrue(), CBlock, EBlock); + SmallVector Updates; + Updates.push_back({cfg::UpdateKind::Insert, FBlock, CBlock}); + Analyses->DT.applyUpdates(Updates); + MSSAU->applyInsertUpdates(Updates, Analyses->DT); + + MemoryPhi *MPC = MSSA.getMemoryAccess(CBlock); + EXPECT_NE(MPC, nullptr); + MemoryPhi *MPD = MSSA.getMemoryAccess(DBlock); + EXPECT_NE(MPD, nullptr); + MemoryPhi *MPE = MSSA.getMemoryAccess(EBlock); + EXPECT_EQ(MPD, MPE->getIncomingValueForBlock(DBlock)); +}