Index: llvm/include/llvm/Transforms/Scalar/GVN.h =================================================================== --- llvm/include/llvm/Transforms/Scalar/GVN.h +++ llvm/include/llvm/Transforms/Scalar/GVN.h @@ -45,7 +45,9 @@ class LoadInst; class LoopInfo; class MemDepResult; +class MemoryAccess; class MemoryDependenceResults; +class MemoryLocation; class MemorySSA; class MemorySSAUpdater; class NonLocalDepResult; @@ -230,6 +232,7 @@ OptimizationRemarkEmitter *ORE = nullptr; ImplicitControlFlowTracking *ICF = nullptr; LoopInfo *LI = nullptr; + AAResults *AA = nullptr; MemorySSAUpdater *MSSAU = nullptr; ValueTable VN; @@ -316,21 +319,67 @@ // List of critical edges to be split between iterations. SmallVector, 4> toSplit; + enum class AvailabilityKind { + // Unavailable value + Other = 0, + + // Exaclty overlapping locations. + Def, + + // Available value superset of needed bits. + Clobber, + }; + struct ReachingMemoryValue { + AvailabilityKind Kind; + BasicBlock *Block; + const Value *Addr; + Instruction *Inst; + int32_t Offset; + + static ReachingMemoryValue getUnknown(BasicBlock *BB, + const Value *Addr = nullptr, + Instruction *Inst = nullptr) { + return {AvailabilityKind::Other, BB, Addr, Inst, -1}; + } + + static ReachingMemoryValue getDef(const Value *Addr, Instruction *Inst) { + return {AvailabilityKind::Def, Inst->getParent(), Addr, Inst, -1}; + } + + static ReachingMemoryValue getClobber(const Value *Addr, Instruction *Inst, + int32_t Offset = -1) { + return {AvailabilityKind::Clobber, Inst->getParent(), Addr, Inst, Offset}; + } + + //protected: + ReachingMemoryValue() = default; + }; + + Optional findReachingValueForLoadInBlock( + const MemoryLocation &Loc, bool IsInvariantload, BasicBlock *BB, + Instruction *DomLower, Instruction *DomUpper, MemoryAccess *ClobberMA, + MemorySSA &MSSA, AAResults &AA); + + void findReachingValuesForLoad(LoadInst *Inst, MemorySSA &MSSA, AAResults &AA, + SmallVectorImpl &Values); + + // Helper functions of redundant load elimination bool processLoad(LoadInst *L); - bool processNonLocalLoad(LoadInst *L); + bool processNonLocalLoad(LoadInst *L, SmallVectorImpl &); bool processAssumeIntrinsic(AssumeInst *II); /// Given a local dependency (Def or Clobber) determine if a value is /// available for the load. Returns true if an value is known to be /// available and populates Res. Returns false otherwise. - bool AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo, - Value *Address, gvn::AvailableValue &Res); + bool AnalyzeLoadAvailability(LoadInst *Load, AvailabilityKind AvailKind, + Instruction *DepInst, int Offset, Value *Address, + gvn::AvailableValue &Res); /// Given a list of non-local dependencies, determine if a value is /// available for the load in each specified block. If it is, add it to /// ValuesPerBlock. If not, add it to UnavailableBlocks. - void AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps, + void AnalyzeLoadAvailability(LoadInst *Load, SmallVectorImpl &Values, AvailValInBlkVect &ValuesPerBlock, UnavailBlkVect &UnavailableBlocks); Index: llvm/lib/Transforms/Scalar/GVN.cpp =================================================================== --- llvm/lib/Transforms/Scalar/GVN.cpp +++ llvm/lib/Transforms/Scalar/GVN.cpp @@ -943,7 +943,7 @@ /// Try to locate the three instruction involved in a missed /// load-elimination case that is due to an intervening store. -static void reportMayClobberedLoad(LoadInst *Load, MemDepResult DepInfo, +static void reportMayClobberedLoad(LoadInst *Load, Instruction *DepInst, DominatorTree *DT, OptimizationRemarkEmitter *ORE) { using namespace ore; @@ -998,21 +998,24 @@ if (OtherAccess) R << " in favor of " << NV("OtherAccess", OtherAccess); - R << " because it is clobbered by " << NV("ClobberedBy", DepInfo.getInst()); + R << " because it is clobbered by " << NV("ClobberedBy", DepInst); ORE->emit(R); } -bool GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo, - Value *Address, AvailableValue &Res) { - assert((DepInfo.isDef() || DepInfo.isClobber()) && +bool GVNPass::AnalyzeLoadAvailability(LoadInst *Load, + AvailabilityKind AvailKind, + Instruction *DepInst, + int Offset, Value *Address, + AvailableValue &Res) { + assert((AvailKind == AvailabilityKind::Def || + AvailKind == AvailabilityKind::Clobber) && "expected a local dependence"); assert(Load->isUnordered() && "rules below are incorrect for ordered access"); const DataLayout &DL = Load->getModule()->getDataLayout(); - Instruction *DepInst = DepInfo.getInst(); - if (DepInfo.isClobber()) { + if (AvailKind == AvailabilityKind::Clobber) { // If the dependence is to a store that writes to a superset of the bits // read by the load, we can extract the bits we need for the load from the // stored value. @@ -1039,17 +1042,12 @@ if (DepLoad != Load && Address && Load->isAtomic() <= DepLoad->isAtomic()) { Type *LoadType = Load->getType(); - int Offset = -1; // If MD reported clobber, check it was nested. - if (DepInfo.isClobber() && - canCoerceMustAliasedValueToLoad(DepLoad, LoadType, DL)) { - const auto ClobberOff = MD->getClobberOffset(DepLoad); - // GVN has no deal with a negative offset. - Offset = (ClobberOff == None || ClobberOff.getValue() < 0) - ? -1 - : ClobberOff.getValue(); - } + if (AvailKind != AvailabilityKind::Clobber || + !canCoerceMustAliasedValueToLoad(DepLoad, LoadType, DL) || + Offset < 0) + Offset = -1; if (Offset == -1) Offset = analyzeLoadFromClobberingLoad(LoadType, Address, DepLoad, DL); @@ -1078,11 +1076,11 @@ dbgs() << "GVN: load "; Load->printAsOperand(dbgs()); dbgs() << " is clobbered by " << *DepInst << '\n';); if (ORE->allowExtraAnalysis(DEBUG_TYPE)) - reportMayClobberedLoad(Load, DepInfo, DT, ORE); + reportMayClobberedLoad(Load, DepInst, DT, ORE); return false; } - assert(DepInfo.isDef() && "follows from above"); + assert(AvailKind == AvailabilityKind::Def && "follows from above"); // Loading the allocation -> undef. if (isa(DepInst) || isMallocLikeFn(DepInst, TLI) || @@ -1138,48 +1136,48 @@ return false; } -void GVNPass::AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps, - AvailValInBlkVect &ValuesPerBlock, - UnavailBlkVect &UnavailableBlocks) { +void GVNPass::AnalyzeLoadAvailability( + LoadInst *Load, SmallVectorImpl &Values, + AvailValInBlkVect &ValuesPerBlock, UnavailBlkVect &UnavailableBlocks) { // Filter out useless results (non-locals, etc). Keep track of the blocks // where we have a value available in repl, also keep track of whether we see // dependencies that produce an unknown value for the load (such as a call // that could potentially clobber the load). - unsigned NumDeps = Deps.size(); - for (unsigned i = 0, e = NumDeps; i != e; ++i) { - BasicBlock *DepBB = Deps[i].getBB(); - MemDepResult DepInfo = Deps[i].getResult(); + unsigned NumValues = Values.size(); + for (unsigned I = 0, E = NumValues; I != E; ++I) { + const ReachingMemoryValue &MemVal = Values[I]; - if (DeadBlocks.count(DepBB)) { + if (DeadBlocks.count(MemVal.Block)) { // Dead dependent mem-op disguise as a load evaluating the same value // as the load in question. - ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(DepBB)); + ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(MemVal.Block)); continue; } - if (!DepInfo.isDef() && !DepInfo.isClobber()) { - UnavailableBlocks.push_back(DepBB); + if (MemVal.Kind == AvailabilityKind::Other) { + UnavailableBlocks.push_back(MemVal.Block); continue; } // The address being loaded in this non-local block may not be the same as // the pointer operand of the load if PHI translation occurs. Make sure - // to consider the right address. - Value *Address = Deps[i].getAddress(); + // to consider the right address. FIXME(chill): move the comment to a more + // appropropriate place. AvailableValue AV; - if (AnalyzeLoadAvailability(Load, DepInfo, Address, AV)) { + if (AnalyzeLoadAvailability(Load, MemVal.Kind, MemVal.Inst, MemVal.Offset, + const_cast(MemVal.Addr), AV)) { // subtlety: because we know this was a non-local dependency, we know // it's safe to materialize anywhere between the instruction within // DepInfo and the end of it's block. - ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, - std::move(AV))); + ValuesPerBlock.push_back( + AvailableValueInBlock::get(MemVal.Block, std::move(AV))); } else { - UnavailableBlocks.push_back(DepBB); + UnavailableBlocks.push_back(MemVal.Block); } } - assert(NumDeps == ValuesPerBlock.size() + UnavailableBlocks.size() && + assert(Values.size() == ValuesPerBlock.size() + UnavailableBlocks.size() && "post condition violation"); } @@ -1591,7 +1589,8 @@ /// Attempt to eliminate a load whose dependencies are /// non-local by performing PHI construction. -bool GVNPass::processNonLocalLoad(LoadInst *Load) { +bool GVNPass::processNonLocalLoad( + LoadInst *Load, SmallVectorImpl &Values) { // non-local speculations are not allowed under asan. if (Load->getParent()->getParent()->hasFnAttribute( Attribute::SanitizeAddress) || @@ -1599,25 +1598,11 @@ Attribute::SanitizeHWAddress)) return false; - // Step 1: Find the non-local dependencies of the load. - LoadDepVect Deps; - MD->getNonLocalPointerDependency(Load, Deps); - // If we had to process more than one hundred blocks to find the // dependencies, this load isn't worth worrying about. Optimizing // it will be too expensive. - unsigned NumDeps = Deps.size(); - if (NumDeps > MaxNumDeps) - return false; - - // If we had a phi translation failure, we'll have a single entry which is a - // clobber in the current block. Reject this early. - if (NumDeps == 1 && - !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) { - LLVM_DEBUG(dbgs() << "GVN: non-local load "; Load->printAsOperand(dbgs()); - dbgs() << " has unknown dependencies\n";); + if (Values.size() > MaxNumDeps) return false; - } bool Changed = false; // If this load follows a GEP, see if we can PRE the indices before analyzing. @@ -1628,17 +1613,17 @@ Changed |= performScalarPRE(I); } - // Step 2: Analyze the availability of the load + // Step 1: Analyze the availability of the load AvailValInBlkVect ValuesPerBlock; UnavailBlkVect UnavailableBlocks; - AnalyzeLoadAvailability(Load, Deps, ValuesPerBlock, UnavailableBlocks); + AnalyzeLoadAvailability(Load, Values, ValuesPerBlock, UnavailableBlocks); // If we have no predecessors that produce a known value for this load, exit // early. if (ValuesPerBlock.empty()) return Changed; - // Step 3: Eliminate fully redundancy. + // Step 2: Eliminate fully redundancy. // // If all of the instructions we depend on produce a known value for this // load, then it is fully redundant and we can use PHI insertion to compute @@ -1666,7 +1651,7 @@ return true; } - // Step 4: Eliminate partial redundancy. + // Step 3: Eliminate partial redundancy. if (!isPREEnabled() || !isLoadPREEnabled()) return Changed; if (!isLoadInLoopPREEnabled() && LI && LI->getLoopFor(Load->getParent())) @@ -1882,6 +1867,405 @@ I->replaceAllUsesWith(Repl); } +Optional GVNPass::findReachingValueForLoadInBlock( + const MemoryLocation &Loc, bool IsInvariantLoad, BasicBlock *BB, + Instruction *DomLower, Instruction *DomUpper, MemoryAccess *ClobberMA, + MemorySSA &MSSA, AAResults &AA) { + + auto updateChoice = [&](ReachingMemoryValue &Choice, AliasResult &AR, + Instruction *Candidate) { + // TODO: Worth choosing between exact or partial overlap ? + if (Choice.Kind == AvailabilityKind::Other) + Choice.Inst = Candidate; + else if (MSSA.locallyDominates(MSSA.getMemoryAccess(Choice.Inst), + MSSA.getMemoryAccess(Candidate))) + Choice.Inst = Candidate; + else + return; + + if (AR == AliasResult::PartialAlias) { + Choice.Kind = AvailabilityKind::Clobber; + Choice.Offset = AR.getOffset(); + } else { + Choice.Kind = AvailabilityKind::Def; + Choice.Offset = -1; + } + Choice.Block = Candidate->getParent(); + }; + + // Lower bound is inclusive, upper bound is exclusive. + auto isBetweenBounds = [&](const MemoryUseOrDef *U) { + if (DomLower == nullptr && DomUpper == nullptr) + return true; + MemoryAccess *Lower = + DomLower == nullptr ? nullptr : MSSA.getMemoryAccess(DomLower); + if (Lower != nullptr && !MSSA.locallyDominates(Lower, U)) + return false; + MemoryAccess *Upper = + DomUpper == nullptr ? nullptr : MSSA.getMemoryAccess(DomUpper); + return Upper == nullptr || (U != Upper && MSSA.locallyDominates(U, Upper)); + }; + + // For all the users of the clobbering access. + auto ReachingVal = ReachingMemoryValue::getUnknown(BB, Loc.Ptr); + const auto *MemAccessList = MSSA.getBlockAccesses(BB); + if (MemAccessList == nullptr) + return None; + for (const MemoryAccess &MA : *MemAccessList) { + auto *UseOrDef = dyn_cast(&MA); + // Invariant loads have `liveOnEntry` as a cloberring access in MemorySSA, + // in which case we walk over all memory accesses in the block. + if (UseOrDef == nullptr || + (!IsInvariantLoad && UseOrDef->getDefiningAccess() != ClobberMA)) + continue; + + // We are interested only in loads here, and, in the case of invariant load, + // in the stores. + Instruction *M = UseOrDef->getMemoryInst(); + auto *L = dyn_cast(M); + if (!IsInvariantLoad && L == nullptr) + continue; + auto *S = dyn_cast(M); + // If it's not a load or a store, it cannot give is a useful value for + // elliminating the load. + if (L == nullptr && S == nullptr) + continue; + + // Skip if the use is not within the bounds. + if (!isBetweenBounds(UseOrDef)) + continue; + + AliasResult AR = AA.alias(MemoryLocation::get(M), Loc); + // If the locations do not certainly alias, we cannot possibly infer the + // following load loads the same value. + if (AR == AliasResult::NoAlias || AR == AliasResult::MayAlias) + continue; + + // Locations partially overlap, but neither is a subset of the other, or the + // second location is before the first. + if (AR == AliasResult::PartialAlias && + (!AR.hasOffset() || AR.getOffset() < 0)) + continue; + + // Locations precisely overlap or the second accesses subset of the bits of + // the first. + updateChoice(ReachingVal, AR, M); + } + + // Found something. + if (ReachingVal.Kind != AvailabilityKind::Other) + return ReachingVal; + + // If the clobbering access is the entry memory state, continue the search + // into predecessors, unless the load is from a local object in which case + // return the allocation instruction. + if (MSSA.isLiveOnEntryDef(ClobberMA)) { + auto *Alloc = dyn_cast(getUnderlyingObject(Loc.Ptr)); + if (Alloc != nullptr && Alloc->getParent() == BB) + return ReachingMemoryValue::getDef(Loc.Ptr, + const_cast(Alloc)); + + return None; + } + + // If the clobberring access is a MemoryPhi or in another block, go to + // predecessors. + if (ClobberMA->getBlock() != BB || isa(ClobberMA)) + return None; + + Instruction *ClobberInst = cast(ClobberMA)->getMemoryInst(); + auto getOrdering = [](const Instruction *I) { + assert(isa(I) || isa(I)); + if (const auto *L = dyn_cast(I)) + return L->getOrdering(); + return cast(I)->getOrdering(); + }; + + // Check if the clobbering access is a load or a store that we can reuse. + if (isa(ClobberInst) || isa(ClobberInst)) { + AliasResult AR = AA.alias(MemoryLocation::get(ClobberInst), Loc); + if (AR == AliasResult::MustAlias) + return ReachingMemoryValue::getDef(Loc.Ptr, ClobberInst); + + if (AR == AliasResult::NoAlias) { + // May happen with atomic/volatile load/store or MemorySSA imprecision. + if (!ClobberInst->isAtomic() || + !isStrongerThan(getOrdering(ClobberInst), AtomicOrdering::Monotonic)) + return None; + return ReachingMemoryValue::getClobber(Loc.Ptr, ClobberInst); + } + + if (AR == AliasResult::MayAlias || + (AR == AliasResult::PartialAlias && + (!AR.hasOffset() || AR.getOffset() < 0))) + return ReachingMemoryValue::getClobber(Loc.Ptr, ClobberInst); + + // The only option left is a store of the superset of the required bits. + assert(AR == AliasResult::PartialAlias && AR.hasOffset() && + AR.getOffset() > 0 && "Follows from the conditions above"); + return ReachingMemoryValue::getClobber(Loc.Ptr, ClobberInst, + AR.getOffset()); + } + + // If we are at a malloc-like function call, we can turn the load into `undef` + // or zero. + if (isNoAliasFn(ClobberInst, TLI)) { + const Value *Obj = getUnderlyingObject(Loc.Ptr); + if (Obj == ClobberInst || AA.isMustAlias(ClobberInst, Loc.Ptr)) + return ReachingMemoryValue::getDef(Loc.Ptr, ClobberInst); + } + + // Can reorder loads across a release fence. + if (auto *Fence = dyn_cast(ClobberInst)) { + if (Fence->getOrdering() == AtomicOrdering::Release) + return None; + } + + // See if the clobber instruction (e.g. a call) may modify the location. + ModRefInfo MR = AA.getModRefInfo(ClobberInst, Loc); + // If modification is possible, analyse deeper, to exclude accesses to + // non-escaping local allocations. + if (isModAndRefSet(MR)) + MR = AA.callCapturesBefore(ClobberInst, Loc, DT); + MR = clearMust(MR); + if (MR == ModRefInfo::NoModRef || MR == ModRefInfo::Ref) + return None; + + // TODO: Masked load/store? + + // Conservatively return unknown value for the load. + return ReachingMemoryValue::getClobber(Loc.Ptr, ClobberInst); +} + +static Instruction * +findInvariantGroupValue(LoadInst *L, DominatorTree &DT) { + // We consider bitcasts and zero GEPs to be the same pointer value. Start by + // stripping bitcasts and zero GEPs, then we will recursively look at loads + // and stores through bitcasts and zero GEPs. + Value *PointerOperand = L->getPointerOperand()->stripPointerCasts(); + + // It's not safe to walk the use list of a global value because function + // passes aren't allowed to look outside their functions. + // FIXME: this could be fixed by filtering instructions from outside of + // current function. + if (isa(PointerOperand)) + return nullptr; + + // Queue to process all pointers that are equivalent to load operand. + SmallVector PointerUsesQueue; + PointerUsesQueue.push_back(PointerOperand); + + Instruction *MostDominatingInstruction = L; + + // FIXME: This loop is O(n^2) because dominates can be O(n) and in worst case + // we will see all the instructions. + while (!PointerUsesQueue.empty()) { + Value *Ptr = PointerUsesQueue.pop_back_val(); + assert(Ptr && !isa(Ptr) && + "Null or GlobalValue should not be inserted"); + + for (User *Us : Ptr->users()) { + auto *U = dyn_cast(Us); + if (!U || U == L || !DT.dominates(U, MostDominatingInstruction)) + continue; + + // Add bitcasts and zero GEPs to queue. + if (isa(U)) { + PointerUsesQueue.push_back(U); + continue; + } + if (auto *GEP = dyn_cast(U)) { + if (GEP->hasAllZeroIndices()) + PointerUsesQueue.push_back(U); + continue; + } + + // If we hit a load/store with an invariant.group metadata and the same + // pointer operand, we can assume that value pointed to by the pointer + // operand didn't change. + if (U->hasMetadata(LLVMContext::MD_invariant_group) && + getLoadStorePointerOperand(U) == Ptr && !U->isVolatile()) { + MostDominatingInstruction = U; + } + } + } + return MostDominatingInstruction == L ? nullptr : MostDominatingInstruction; +} + +void GVNPass::findReachingValuesForLoad( + LoadInst *L, MemorySSA &MSSA, AAResults &AA, + SmallVectorImpl &Values) { + + struct WorkItem { + WorkItem(BasicBlock *BB, MemoryAccess *ClobberMA, const PHITransAddr &Addr, + Instruction *DomLower, Instruction *DomUpper) + : BB(BB), ClobberMA(ClobberMA), Addr(Addr), DomLower(DomLower), + DomUpper(DomUpper) {} + BasicBlock *BB; + MemoryAccess *ClobberMA; + PHITransAddr Addr; + Instruction *DomLower; + Instruction *DomUpper; + }; + SmallVector Worklist; + + // FIXME(chill): Better comment + // The starting block is special in that we may need to visit it twice if + // it is part of a loop: once for the part between the beginning of the + // block and the original load instruction, and once for the part between + // the original load instruction and the end of the block. Second visit to + // the starting block is indicated by the `DomLower` being the initial load + // instruction that's when we add the starting block to the visited set. + DenseMap Visited; + auto markVisited = [&](const WorkItem &Item) { + if (Item.BB != L->getParent() || Item.DomLower == L) + Visited.insert({Item.BB, Item.Addr.getAddr()}); + }; + + // Factor out the logic deciding whether to continue traversing a predecessor. + // Return `Skip` for unreachable blocks and blocks, already visited + // with the same (maybe phi-translated) address. Return `FailPred` on + // phi-translation failure to an unvisited block. Return `FailBlock` if we + // have come to a block already visited with a different address. Otherwise, + // return `OK'. + enum { FailBlock, FailPred, Skip, OK }; + auto shouldTraversePredecessor = [&](BasicBlock *Pred, BasicBlock *BB, + PHITransAddr &Addr) { + if (!DT->isReachableFromEntry(Pred)) + return Skip; + if (Addr.NeedsPHITranslationFromBlock(BB)) + Addr.PHITranslateValue(BB, Pred, DT, false); + auto It = Visited.find(Pred); + if (It != Visited.end()) + return It->second == Addr.getAddr() ? Skip : FailBlock; + return Addr.getAddr() ? OK : FailPred; + }; + + // auto &Walker = *MSSA.getSkipSelfWalker(); + const DataLayout &DL = L->getModule()->getDataLayout(); + auto Loc = MemoryLocation::get(L); + bool IsInvariantLoad = L->hasMetadata(LLVMContext::MD_invariant_load); + Worklist.emplace_back( + L->getParent(), + MSSA.getMemoryAccess(L) + ->getDefiningAccess(), // Walker.getClobberingMemoryAccess(L), + PHITransAddr(L->getPointerOperand(), DL, AC), nullptr, L); + while (!Worklist.empty()) { + WorkItem Item = Worklist.back(); + Worklist.pop_back(); + + if (Visited.count(Item.BB)) + continue; + + assert( + Item.Addr.getAddr() != nullptr && + "Blocks with failed phi-translation must not appear on the worklist"); + // if (Item.Addr.getAddr() == nullptr) { + // markVisited(Item); + // Values.push_back(ReachingMemoryValue::getUnknown(Item.BB)); + // continue; + // } + + // If we have found a definite answer (a reusable value or unknown), + // continue with the next block in the worklist. + if (Optional R = findReachingValueForLoadInBlock( + Loc.getWithNewPtr(Item.Addr.getAddr()), IsInvariantLoad, Item.BB, + Item.DomLower, Item.DomUpper, Item.ClobberMA, MSSA, AA)) { + if (R->Kind != AvailabilityKind::Def && + L->hasMetadata(LLVMContext::MD_invariant_group)) { + if (Instruction *G = findInvariantGroupValue(L, *DT)) + R = ReachingMemoryValue::getDef(getLoadStorePointerOperand(G), G); + } + Values.emplace_back(std::move(*R)); + markVisited(Item); + continue; + } + + // If the clobbering access is in another block, look in the predecessors, + // keeping the same clobbering access. This also handles the case when the + // clobbering access is liveOnEntry and we aren't at the entry block. + if (Item.ClobberMA->getBlock() != Item.BB) { + markVisited(Item); + SmallVector TmpWorklist; + for (BasicBlock *BB : predecessors(Item.BB)) { + PHITransAddr Addr = Item.Addr; + auto Status = shouldTraversePredecessor(BB, Item.BB, Addr); + if (Status == FailBlock) { + TmpWorklist.clear(); + Values.push_back(ReachingMemoryValue::getUnknown(Item.BB)); + break; + } + if (Status == FailPred) { + markVisited( + {BB, nullptr, Addr, BB == L->getParent() ? L : nullptr, nullptr}); + Values.push_back(ReachingMemoryValue::getUnknown(BB)); + continue; + } + if (Status == Skip) + continue; + TmpWorklist.emplace_back(BB, Item.ClobberMA, Addr, + BB == L->getParent() ? L : nullptr, nullptr); + } + Worklist.append(std::make_move_iterator(TmpWorklist.begin()), + std::make_move_iterator(TmpWorklist.end())); + continue; + } + + // If the clobbering access is a MemoryPhi, look in the predecessors, + // using the corresponding incoming value for this MemoryPhi as the + // clobbering access. FIXME(chill): wrong comment. + if (auto *MPhi = dyn_cast(Item.ClobberMA)) { + markVisited(Item); + SmallVector TmpWorklist; + for (unsigned I = 0, N = MPhi->getNumIncomingValues(); I < N; ++I) { + BasicBlock *BB = MPhi->getIncomingBlock(I); + PHITransAddr Addr = Item.Addr; + auto Status = shouldTraversePredecessor(BB, Item.BB, Addr); + if (Status == FailBlock) { + TmpWorklist.clear(); + Values.push_back(ReachingMemoryValue::getUnknown(Item.BB)); + break; + } + if (Status == FailPred) { + markVisited( + {BB, nullptr, Addr, BB == L->getParent() ? L : nullptr, nullptr}); + Values.push_back(ReachingMemoryValue::getUnknown(BB)); + continue; + } + if (Status == Skip) + continue; + TmpWorklist.emplace_back(BB, MPhi->getIncomingValue(I), Addr, + BB == L->getParent() ? L : nullptr, nullptr); + // TmpWorklist.emplace_back( + // BB, + // Walker.getClobberingMemoryAccess(MPhi->getIncomingValue(I), + // Loc.getWithNewPtr(Addr.getAddr())), + // Addr, BB == L->getParent() ? L : nullptr, nullptr); + continue; + } + Worklist.append(std::make_move_iterator(TmpWorklist.begin()), + std::make_move_iterator(TmpWorklist.end())); + continue; + } + + if (!MSSA.isLiveOnEntryDef(Item.ClobberMA)) { + // The clobbering access is a normal instruction, that we can + // nevertheless skip over (e.g. a release fence). + auto *Def = cast(Item.ClobberMA); + Worklist.emplace_back(Item); + Worklist.back().ClobberMA = Def->getDefiningAccess(); + // Worklist.back().ClobberMA = Walker.getClobberingMemoryAccess( + // Def->getDefiningAccess(), Loc.getWithNewPtr(Item.Addr.getAddr())); + continue; + } + + // If we have liveOnEntry and we are at the entry block, then this block + // does not provide any useful value for the load. + markVisited(Item); + Values.emplace_back(ReachingMemoryValue::getUnknown(Item.BB, Loc.Ptr)); + } +} + /// Attempt to eliminate a load, first by eliminating it /// locally, and then attempting non-local elimination if that fails. bool GVNPass::processLoad(LoadInst *L) { @@ -1897,25 +2281,26 @@ return true; } - // ... to a pointer that has been loaded from before... - MemDepResult Dep = MD->getDependency(L); - - // If it is defined in another block, try harder. - if (Dep.isNonLocal()) - return processNonLocalLoad(L); + SmallVector ReachingVals; + findReachingValuesForLoad(L, *MSSAU->getMemorySSA(), *AA, ReachingVals); + assert(ReachingVals.size() && "Expected at least an unknown value"); + if (ReachingVals.size() > 1 || ReachingVals[0].Block != L->getParent()) + return processNonLocalLoad(L, ReachingVals); // Only handle the local case below - if (!Dep.isDef() && !Dep.isClobber()) { + if (ReachingVals[0].Kind == AvailabilityKind::Other) { // This might be a NonFuncLocal or an Unknown LLVM_DEBUG( // fast print dep, using operator<< on instruction is too slow. dbgs() << "GVN: load "; L->printAsOperand(dbgs()); - dbgs() << " has unknown dependence\n";); + dbgs() << " does not load a known value\n";); return false; } AvailableValue AV; - if (AnalyzeLoadAvailability(L, Dep, L->getPointerOperand(), AV)) { + if (AnalyzeLoadAvailability(L, ReachingVals[0].Kind, ReachingVals[0].Inst, + ReachingVals[0].Offset, L->getPointerOperand(), + AV)) { Value *AvailableValue = AV.MaterializeAdjustedValue(L, L, *this); // Replace the load! @@ -2428,6 +2813,7 @@ DT = &RunDT; VN.setDomTree(DT); TLI = &RunTLI; + AA = &RunAA; VN.setAliasAnalysis(&RunAA); MD = RunMD; ImplicitControlFlowTracking ImplicitCFT; Index: llvm/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll =================================================================== --- llvm/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll +++ llvm/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll @@ -25,19 +25,22 @@ ret void } -; GVN should ignore the store to p1 to see that the first load from p is +; WAS: GVN should ignore the store to p1 to see that the first load from p is ; fully redundant. However, the second load uses a different type. Theoretically ; the other type could be unified with the first type, however for now, GVN ; should just be conservative. +; However, with the MemorySSA changes this no longer happens and GVN optimises +; it just like in the next function. + ; CHECK: @watch_out_for_type_change ; CHECK: if.then: ; CHECK: %t = load i32, i32* %p ; CHECK: store i32 %t, i32* %q ; CHECK: ret void ; CHECK: if.else: -; CHECK: %u = load i32, i32* %p -; CHECK: store i32 %u, i32* %q +; CHECK: store i32 0, i32* %q +; CHECK: ret void define void @watch_out_for_type_change(i1 %c, i32* %p, i32* %p1, i32* %q) nounwind { entry: Index: llvm/test/Transforms/GVN/PRE/rle.ll =================================================================== --- llvm/test/Transforms/GVN/PRE/rle.ll +++ llvm/test/Transforms/GVN/PRE/rle.ll @@ -1,4 +1,3 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -data-layout="e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-n8:16:32" -basic-aa -gvn -S -dce | FileCheck %s --check-prefixes=CHECK,LE ; RUN: opt < %s -data-layout="E-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-n32" -basic-aa -gvn -S -dce | FileCheck %s --check-prefixes=CHECK,BE @@ -1014,11 +1013,11 @@ ; CHECK-NEXT: [[XX:%.*]] = bitcast i8* [[P:%.*]] to i32* ; CHECK-NEXT: [[X1:%.*]] = load i32, i32* [[XX]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X1]], 127 -; LE-NEXT: [[TMP0:%.*]] = lshr i32 [[X1]], 16 -; BE-NEXT: [[TMP0:%.*]] = lshr i32 [[X1]], 8 +; LE-NEXT: [[TMP0:%.*]] = lshr i32 [[X1]], 8 +; BE-NEXT: [[TMP0:%.*]] = lshr i32 [[X1]], 16 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8 -; LE-NEXT: [[TMP2:%.*]] = lshr i32 [[X1]], 8 -; BE-NEXT: [[TMP2:%.*]] = lshr i32 [[X1]], 16 +; LE-NEXT: [[TMP2:%.*]] = lshr i32 [[X1]], 16 +; BE-NEXT: [[TMP2:%.*]] = lshr i32 [[X1]], 8 ; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8 ; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[ELSE:%.*]] ; CHECK: if: @@ -1026,7 +1025,7 @@ ; CHECK: else: ; CHECK-NEXT: br label [[JOIN]] ; CHECK: join: -; CHECK-NEXT: [[TTMP5:%.*]] = phi i8 [ [[TMP3]], [[IF]] ], [ [[TMP1]], [[ELSE]] ] +; CHECK-NEXT: [[TTMP5:%.*]] = phi i8 [ [[TMP1]], [[IF]] ], [ [[TMP3]], [[ELSE]] ] ; CHECK-NEXT: [[CONV6:%.*]] = zext i8 [[TTMP5]] to i32 ; CHECK-NEXT: ret i32 [[CONV6]] ; CHECK: if.end: Index: llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll +++ llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll @@ -1850,7 +1850,7 @@ ; O3DEFAULT-NEXT: [[TMP47:%.*]] = add nsw <4 x i32> [[TMP46]], [[SHUFFLE]] ; O3DEFAULT-NEXT: [[TMP48:%.*]] = bitcast i32* [[ARRAYIDX2_44]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP47]], <4 x i32>* [[TMP48]], align 4 -; O3DEFAULT-NEXT: [[TMP49:%.*]] = load i32, i32* [[A]], align 4 +; O3DEFAULT-NEXT: [[TMP49:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 ; O3DEFAULT-NEXT: ret i32 [[TMP49]] ; ; Os-LABEL: @disabled(