Index: llvm/trunk/include/llvm/Transforms/Utils/MemorySSA.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Utils/MemorySSA.h +++ llvm/trunk/include/llvm/Transforms/Utils/MemorySSA.h @@ -110,6 +110,11 @@ class MemoryAccess; class LLVMContext; class raw_ostream; +enum { + // Used to signify what the default invalid ID is for MemoryAccess's + // getID() + INVALID_MEMORYACCESS_ID = 0 +}; template class memoryaccess_def_iterator_base; using memoryaccess_def_iterator = memoryaccess_def_iterator_base; @@ -157,7 +162,8 @@ friend class MemoryDef; friend class MemoryPhi; - /// \brief Used internally to give IDs to MemoryAccesses for printing + /// \brief Used for debugging and tracking things about MemoryAccesses. + /// Guaranteed unique among MemoryAccesses, no guarantees otherwise. virtual unsigned getID() const = 0; MemoryAccess(LLVMContext &C, unsigned Vty, BasicBlock *BB, @@ -235,7 +241,7 @@ void *operator new(size_t s) { return User::operator new(s, 1); } MemoryUse(LLVMContext &C, MemoryAccess *DMA, Instruction *MI, BasicBlock *BB) - : MemoryUseOrDef(C, DMA, MemoryUseVal, MI, BB) {} + : MemoryUseOrDef(C, DMA, MemoryUseVal, MI, BB), OptimizedID(0) {} static inline bool classof(const MemoryUse *) { return true; } static inline bool classof(const Value *MA) { @@ -243,6 +249,18 @@ } void print(raw_ostream &OS) const override; + void setDefiningAccess(MemoryAccess *DMA, bool Optimized = false) { + if (Optimized) + OptimizedID = DMA->getID(); + MemoryUseOrDef::setDefiningAccess(DMA); + } + bool isOptimized() const { + return getDefiningAccess() && OptimizedID == getDefiningAccess()->getID(); + } + /// \brief Reset the ID of what this MemoryUse was optimized to, causing it to + /// be rewalked by the walker if necessary. + /// This really should only be called by tests. + void resetOptimized() { OptimizedID = INVALID_MEMORYACCESS_ID; } protected: friend class MemorySSA; @@ -250,6 +268,9 @@ unsigned getID() const override { llvm_unreachable("MemoryUses do not have IDs"); } + +private: + unsigned int OptimizedID; }; template <> @@ -288,8 +309,6 @@ protected: friend class MemorySSA; - // For debugging only. This gets used to give memory accesses pretty numbers - // when printing them out unsigned getID() const override { return ID; } private: @@ -446,8 +465,6 @@ User::allocHungoffUses(N, /* IsPhi */ true); } - /// For debugging only. This gets used to give memory accesses pretty numbers - /// when printing them out unsigned getID() const final { return ID; } private: Index: llvm/trunk/lib/Transforms/Utils/MemorySSA.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/MemorySSA.cpp +++ llvm/trunk/lib/Transforms/Utils/MemorySSA.cpp @@ -1229,7 +1229,7 @@ MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT) : AA(AA), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr), - NextID(0) { + NextID(INVALID_MEMORYACCESS_ID) { buildMemorySSA(); } @@ -1332,7 +1332,7 @@ } if (isUseTriviallyOptimizableToLiveOnEntry(*AA, MU->getMemoryInst())) { - MU->setDefiningAccess(MSSA->getLiveOnEntryDef()); + MU->setDefiningAccess(MSSA->getLiveOnEntryDef(), true); continue; } @@ -1428,13 +1428,13 @@ // At the end of this loop, UpperBound is either a clobber, or lower bound // PHI walking may cause it to be < LowerBound, and in fact, < LastKill. if (FoundClobberResult || UpperBound < LocInfo.LastKill) { - MU->setDefiningAccess(VersionStack[UpperBound]); + MU->setDefiningAccess(VersionStack[UpperBound], true); // We were last killed now by where we got to LocInfo.LastKill = UpperBound; } else { // Otherwise, we checked all the new ones, and now we know we can get to // LastKill. - MU->setDefiningAccess(VersionStack[LocInfo.LastKill]); + MU->setDefiningAccess(VersionStack[LocInfo.LastKill], true); } LocInfo.LowerBound = VersionStack.size() - 1; LocInfo.LowerBoundBlock = BB; @@ -1754,8 +1754,27 @@ } // Re-point the uses at our defining access - if (!MA->use_empty()) - MA->replaceAllUsesWith(NewDefTarget); + if (!MA->use_empty()) { + // Reset optimized on users of this store, and reset the uses. + // A few notes: + // 1. This is a slightly modified version of RAUW to avoid walking the + // uses twice here. + // 2. If we wanted to be complete, we would have to reset the optimized + // flags on users of phi nodes if doing the below makes a phi node have all + // the same arguments. Instead, we prefer users to removeMemoryAccess those + // phi nodes, because doing it here would be N^3. + if (MA->hasValueHandle()) + ValueHandleBase::ValueIsRAUWd(MA, NewDefTarget); + // Note: We assume MemorySSA is not used in metadata since it's not really + // part of the IR. + + while (!MA->use_empty()) { + Use &U = *MA->use_begin(); + if (MemoryUse *MU = dyn_cast(U.getUser())) + MU->resetOptimized(); + U.set(NewDefTarget); + } + } // The call below to erase will destroy MA, so we can't change the order we // are doing things here @@ -2115,6 +2134,7 @@ if (MemoryUse *MU = dyn_cast(MA)) { UpwardsMemoryQuery Q(MU->getMemoryInst(), MU); Cache.remove(MU, Q.StartingLoc, Q.IsCall); + MU->resetOptimized(); } else { // If it is not a use, the best we can do right now is destroy the cache. Cache.clear(); @@ -2188,6 +2208,13 @@ if (!StartingAccess) return MA; + // If this is an already optimized use or def, return the optimized result. + // Note: Currently, we do not store the optimized def result because we'd need + // a separate field, since we can't use it as the defining access. + if (MemoryUse *MU = dyn_cast(StartingAccess)) + if (MU->isOptimized()) + return MU->getDefiningAccess(); + const Instruction *I = StartingAccess->getMemoryInst(); UpwardsMemoryQuery Q(I, StartingAccess); // We can't sanely do anything with a fences, they conservatively @@ -2202,6 +2229,8 @@ if (isUseTriviallyOptimizableToLiveOnEntry(*MSSA->AA, I)) { MemoryAccess *LiveOnEntry = MSSA->getLiveOnEntryDef(); Cache.insert(StartingAccess, LiveOnEntry, Q.StartingLoc, Q.IsCall); + if (MemoryUse *MU = dyn_cast(StartingAccess)) + MU->setDefiningAccess(LiveOnEntry, true); return LiveOnEntry; } @@ -2218,6 +2247,8 @@ DEBUG(dbgs() << *DefiningAccess << "\n"); DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is "); DEBUG(dbgs() << *Result << "\n"); + if (MemoryUse *MU = dyn_cast(StartingAccess)) + MU->setDefiningAccess(Result, true); return Result; } Index: llvm/trunk/unittests/Transforms/Utils/MemorySSA.cpp =================================================================== --- llvm/trunk/unittests/Transforms/Utils/MemorySSA.cpp +++ llvm/trunk/unittests/Transforms/Utils/MemorySSA.cpp @@ -225,9 +225,15 @@ // but we should now get live on entry for the clobbering definition of the // load, since it will walk past the phi node since every argument is the // same. + // XXX: This currently requires either removing the phi or resetting optimized + // on the load + + EXPECT_FALSE( + MSSA.isLiveOnEntryDef(Walker->getClobberingMemoryAccess(LoadInst))); + // If we reset optimized, we get live on entry. + LoadAccess->resetOptimized(); EXPECT_TRUE( MSSA.isLiveOnEntryDef(Walker->getClobberingMemoryAccess(LoadInst))); - // The phi should now be a two entry phi with two live on entry defs. for (const auto &Op : DefiningAccess->operands()) { MemoryAccess *Operand = cast(&*Op); @@ -450,24 +456,32 @@ EXPECT_EQ(LoadClobber, MSSA.getLiveOnEntryDef()); } -// At one point, we were building MSSA with 0 AA passes. This ensures that we -// actually use BasicAA. -TEST_F(MemorySSATest, AAIsPresentAtBuildTime) { +// Test loads get reoptimized properly by the walker. +TEST_F(MemorySSATest, WalkerReopt) { F = Function::Create(FunctionType::get(B.getVoidTy(), {}, false), GlobalValue::ExternalLinkage, "F", &M); B.SetInsertPoint(BasicBlock::Create(C, "", F)); - Type *Int8 = Type::getInt8Ty(C); - Constant *One = ConstantInt::get(Int8, 1); - Value *AllocaA = B.CreateAlloca(Int8); - Instruction *StoreA = B.CreateStore(One, AllocaA); - - Value *AllocaB = B.CreateAlloca(Int8); - B.CreateStore(One, AllocaB); - Instruction *LoadA = B.CreateLoad(AllocaA); + Value *AllocaA = B.CreateAlloca(Int8, ConstantInt::get(Int8, 1), "A"); + Instruction *SIA = B.CreateStore(ConstantInt::get(Int8, 0), AllocaA); + Value *AllocaB = B.CreateAlloca(Int8, ConstantInt::get(Int8, 1), "B"); + Instruction *SIB = B.CreateStore(ConstantInt::get(Int8, 0), AllocaB); + Instruction *LIA = B.CreateLoad(AllocaA); setupAnalyses(); MemorySSA &MSSA = *Analyses->MSSA; - auto *MU = cast(MSSA.getMemoryAccess(LoadA)); - EXPECT_EQ(MU->getDefiningAccess(), MSSA.getMemoryAccess(StoreA)); + MemorySSAWalker *Walker = Analyses->Walker; + + MemoryAccess *LoadClobber = Walker->getClobberingMemoryAccess(LIA); + MemoryUse *LoadAccess = cast(MSSA.getMemoryAccess(LIA)); + EXPECT_EQ(LoadClobber, MSSA.getMemoryAccess(SIA)); + EXPECT_TRUE(MSSA.isLiveOnEntryDef(Walker->getClobberingMemoryAccess(SIA))); + MSSA.removeMemoryAccess(LoadAccess); + + // Create the load memory access pointing to an unoptimized place. + MemoryUse *NewLoadAccess = cast(MSSA.createMemoryAccessInBB( + LIA, MSSA.getMemoryAccess(SIB), LIA->getParent(), MemorySSA::End)); + // This should it cause it to be optimized + EXPECT_EQ(Walker->getClobberingMemoryAccess(NewLoadAccess), LoadClobber); + EXPECT_EQ(NewLoadAccess->getDefiningAccess(), LoadClobber); }