Index: llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h =================================================================== --- llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h +++ llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -159,7 +159,6 @@ bool LoopsInterleaved; bool RerollLoops; bool NewGVN; - bool DisableGVNLoadPRE; bool ForgetAllSCEVInLoopUnroll; bool VerifyInput; bool VerifyOutput; Index: llvm/include/llvm/Transforms/Scalar/GVN.h =================================================================== --- llvm/include/llvm/Transforms/Scalar/GVN.h +++ llvm/include/llvm/Transforms/Scalar/GVN.h @@ -19,6 +19,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/PHITransAddr.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/PassManager.h" @@ -35,6 +36,7 @@ class AssumeInst; class AssumptionCache; class BasicBlock; +class BatchAAResults; class BranchInst; class CallInst; class ExtractValueInst; @@ -44,8 +46,8 @@ class ImplicitControlFlowTracking; class LoadInst; class LoopInfo; -class MemDepResult; -class MemoryDependenceResults; +class MemoryAccess; +class MemoryLocation; class MemorySSA; class MemorySSAUpdater; class NonLocalDepResult; @@ -75,7 +77,7 @@ Optional AllowLoadPRE = None; Optional AllowLoadInLoopPRE = None; Optional AllowLoadPRESplitBackedge = None; - Optional AllowMemDep = None; + Optional AllowMemorySSA = None; GVNOptions() = default; @@ -102,9 +104,9 @@ return *this; } - /// Enables or disables use of MemDepAnalysis. - GVNOptions &setMemDep(bool MemDep) { - AllowMemDep = MemDep; + /// Enables or disables use of MemorySSA. + GVNOptions &setMemorySSA(bool MemSSA) { + AllowMemorySSA = MemSSA; return *this; } }; @@ -136,13 +138,12 @@ DominatorTree &getDominatorTree() const { return *DT; } AAResults *getAliasAnalysis() const { return VN.getAliasAnalysis(); } - MemoryDependenceResults &getMemDep() const { return *MD; } bool isPREEnabled() const; bool isLoadPREEnabled() const; bool isLoadInLoopPREEnabled() const; bool isLoadPRESplitBackedgeEnabled() const; - bool isMemDepEnabled() const; + bool isMemorySSAEnabled() const; /// This class holds the mapping between values and value numbers. It is used /// as an efficient mechanism to determine the expression-wise equivalence of @@ -163,13 +164,17 @@ // Value number to PHINode mapping. Used for phi-translate in scalarpre. DenseMap NumberingPhi; + // Value number to BasicBlock mapping. Used for phi-translate across + // MemoryPhis. + DenseMap NumberingBB; + // Cache for phi-translate in scalarpre. using PhiTranslateMap = DenseMap, uint32_t>; PhiTranslateMap PhiTranslateTable; AAResults *AA = nullptr; - MemoryDependenceResults *MD = nullptr; + MemorySSA *MSSA = nullptr; DominatorTree *DT = nullptr; uint32_t nextValueNumber = 1; @@ -179,7 +184,10 @@ Value *LHS, Value *RHS); Expression createExtractvalueExpr(ExtractValueInst *EI); Expression createGEPExpr(GetElementPtrInst *GEP); + Expression createCallExpr(CallInst *C); + void addMemoryStateArg(Instruction *I, Expression &E); uint32_t lookupOrAddCall(CallInst *C); + uint32_t lookupOrAddLoadStore(Instruction *I); uint32_t phiTranslateImpl(const BasicBlock *BB, const BasicBlock *PhiBlock, uint32_t Num, GVNPass &Gvn); bool areCallValsEqual(uint32_t Num, uint32_t NewNum, const BasicBlock *Pred, @@ -194,6 +202,7 @@ ~ValueTable(); ValueTable &operator=(const ValueTable &Arg); + uint32_t lookupOrAdd(MemoryAccess *); uint32_t lookupOrAdd(Value *V); uint32_t lookup(Value *V, bool Verify = true) const; uint32_t lookupOrAddCmp(unsigned Opcode, CmpInst::Predicate Pred, @@ -207,7 +216,7 @@ void erase(Value *v); void setAliasAnalysis(AAResults *A) { AA = A; } AAResults *getAliasAnalysis() const { return AA; } - void setMemDep(MemoryDependenceResults *M) { MD = M; } + void setMemorySSA(MemorySSA *M) { MSSA = M; } void setDomTree(DominatorTree *D) { DT = D; } uint32_t getNextUnusedValueNumber() { return nextValueNumber; } void verifyRemoved(const Value *) const; @@ -217,7 +226,6 @@ friend class gvn::GVNLegacyPass; friend struct DenseMapInfo; - MemoryDependenceResults *MD = nullptr; DominatorTree *DT = nullptr; const TargetLibraryInfo *TLI = nullptr; AssumptionCache *AC = nullptr; @@ -225,6 +233,7 @@ OptimizationRemarkEmitter *ORE = nullptr; ImplicitControlFlowTracking *ICF = nullptr; LoopInfo *LI = nullptr; + AAResults *AA = nullptr; MemorySSAUpdater *MSSAU = nullptr; ValueTable VN; @@ -259,8 +268,7 @@ using UnavailBlkVect = SmallVector; bool runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, - const TargetLibraryInfo &RunTLI, AAResults &RunAA, - MemoryDependenceResults *RunMD, LoopInfo *LI, + const TargetLibraryInfo &RunTLI, AAResults &RunAA, LoopInfo *LI, OptimizationRemarkEmitter *ORE, MemorySSA *MSSA = nullptr); /// Push a new Value to the LeaderTable onto the list for its value number. @@ -311,21 +319,89 @@ // List of critical edges to be split between iterations. SmallVector, 4> toSplit; + enum class DepKind { + Other = 0, // Unknown value + Def, // Exaclty overlapping locations. + Clobber, // Reaching value superset of needed bits. + }; + + // Describe a memory location value, such that there exists a path to a point + // in the program, along which that memory location is not modified. + struct ReachingMemVal { + DepKind Kind; + BasicBlock *Block; + const Value *Addr; + Instruction *Inst; + int32_t Offset; + + static ReachingMemVal getUnknown(BasicBlock *BB, const Value *Addr, + Instruction *Inst = nullptr) { + return {DepKind::Other, BB, Addr, Inst, -1}; + } + + static ReachingMemVal getDef(const Value *Addr, Instruction *Inst) { + return {DepKind::Def, Inst->getParent(), Addr, Inst, -1}; + } + + static ReachingMemVal getClobber(const Value *Addr, Instruction *Inst, + int32_t Offset = -1) { + return {DepKind::Clobber, Inst->getParent(), Addr, Inst, Offset}; + } + }; + + struct DependencyBlockInfo { + DependencyBlockInfo() = delete; + DependencyBlockInfo(const PHITransAddr &Addr, MemoryAccess *ClobberMA) + : Addr{Addr}, InitialClobberMA{ClobberMA}, ClobberMA{ClobberMA}, + ForceUnknown{false}, Visited{false} {} + PHITransAddr Addr; + MemoryAccess *InitialClobberMA; + MemoryAccess *ClobberMA; + Optional MemVal; + bool ForceUnknown : 1; + bool Visited : 1; + }; + + using DependencyBlockSet = DenseMap; + + void collectClobberList(MemorySSA &MSSA, BasicBlock *BB, + const DependencyBlockInfo &StartInfo, + const DependencyBlockSet &Blocks, + SmallVectorImpl &Clobbers); + + Optional + scanUsers(MemorySSA &MSSA, BatchAAResults &AA, BasicBlock &BB, + const SmallVectorImpl &Clobbers, + const MemoryLocation &Loc, bool IsInvariantLoad, + LoadInst *L = nullptr); + + Optional accessModifiesLocation( + const MemorySSA &MSSA, BatchAAResults &AA, const MemoryAccess &ClobberMA, + const MemoryLocation &Loc, bool IsInvariantLoad, BasicBlock &BB); + + bool collectPredecessors(BasicBlock *BB, const PHITransAddr &Addr, + MemoryAccess *ClobberMA, DependencyBlockSet &Blocks, + SmallVectorImpl &Worklist); + + bool findReachingValuesForLoad(LoadInst *Inst, MemorySSA &MSSA, AAResults &AA, + SmallVectorImpl &Values); + // Helper functions of redundant load elimination bool processLoad(LoadInst *L); - bool processNonLocalLoad(LoadInst *L); + bool processNonLocalLoad(LoadInst *L, SmallVectorImpl &Deps); bool processAssumeIntrinsic(AssumeInst *II); /// Given a local dependency (Def or Clobber) determine if a value is /// available for the load. Returns true if an value is known to be /// available and populates Res. Returns false otherwise. - bool AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo, + bool AnalyzeLoadAvailability(LoadInst *Load, const ReachingMemVal &Dep, Value *Address, gvn::AvailableValue &Res); /// Given a list of non-local dependencies, determine if a value is /// available for the load in each specified block. If it is, add it to /// ValuesPerBlock. If not, add it to UnavailableBlocks. - void AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps, + void AnalyzeLoadAvailability(LoadInst *Load, + SmallVectorImpl &Deps, AvailValInBlkVect &ValuesPerBlock, UnavailBlkVect &UnavailableBlocks); @@ -367,9 +443,8 @@ void assignBlockRPONumber(Function &F); }; -/// Create a legacy GVN pass. This also allows parameterizing whether or not -/// MemDep is enabled. -FunctionPass *createGVNPass(bool NoMemDepAnalysis = false); +/// Create a legacy GVN pass. +FunctionPass *createGVNPass(); /// A simple and fast domtree-based GVN pass to hoist common expressions /// from sibling branches. Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -819,8 +819,8 @@ Result.setLoadPRE(Enable); } else if (ParamName == "split-backedge-load-pre") { Result.setLoadPRESplitBackedge(Enable); - } else if (ParamName == "memdep") { - Result.setMemDep(Enable); + } else if (ParamName == "memoryssa") { + Result.setMemorySSA(Enable); } else { return make_error( formatv("invalid GVN pass parameter '{0}' ", ParamName).str(), Index: llvm/lib/Passes/PassRegistry.def =================================================================== --- llvm/lib/Passes/PassRegistry.def +++ llvm/lib/Passes/PassRegistry.def @@ -465,7 +465,8 @@ "no-pre;pre;" "no-load-pre;load-pre;" "no-split-backedge-load-pre;split-backedge-load-pre;" - "no-memdep;memdep") + "no-memdep;memdep;" + "no-memoryssa;memoryssa") FUNCTION_PASS_WITH_PARAMS("print", "StackLifetimePrinterPass", [](StackLifetime::LivenessType Type) { Index: llvm/lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -179,7 +179,6 @@ NewGVN = RunNewGVN; LicmMssaOptCap = SetLicmMssaOptCap; LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap; - DisableGVNLoadPRE = false; ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll; VerifyInput = false; VerifyOutput = false; @@ -409,7 +408,7 @@ if (OptLevel > 1) { MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds MPM.add(NewGVN ? createNewGVNPass() - : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies + : createGVNPass()); // Remove redundancies } MPM.add(createSCCPPass()); // Constant prop with SCCP Index: llvm/lib/Transforms/Scalar/GVN.cpp =================================================================== --- llvm/lib/Transforms/Scalar/GVN.cpp +++ llvm/lib/Transforms/Scalar/GVN.cpp @@ -35,7 +35,6 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" @@ -108,7 +107,12 @@ static cl::opt GVNEnableSplitBackedgeInLoadPRE("enable-split-backedge-in-load-pre", cl::init(false)); -static cl::opt GVNEnableMemDep("enable-gvn-memdep", cl::init(true)); +static cl::opt GVNEnableMemorySSA("enable-gvn-memoryssa", cl::init(true)); + +static cl::opt BlockScanLimit( + "gvn-block-scan-limit", cl::Hidden, cl::init(100), + cl::desc("The number of memory accesses to scan in a block in reaching " + "memory values analysis (default = 100)")); static cl::opt MaxNumDeps( "gvn-max-num-deps", cl::Hidden, cl::init(100), @@ -309,17 +313,9 @@ Expression e; e.type = I->getType(); e.opcode = I->getOpcode(); - if (const GCRelocateInst *GCR = dyn_cast(I)) { - // gc.relocate is 'special' call: its second and third operands are - // not real values, but indices into statepoint's argument list. - // Use the refered to values for purposes of identity. - e.varargs.push_back(lookupOrAdd(GCR->getOperand(0))); - e.varargs.push_back(lookupOrAdd(GCR->getBasePtr())); - e.varargs.push_back(lookupOrAdd(GCR->getDerivedPtr())); - } else { - for (Use &Op : I->operands()) - e.varargs.push_back(lookupOrAdd(Op)); - } + for (Use &Op : I->operands()) + e.varargs.push_back(lookupOrAdd(Op)); + if (I->isCommutative()) { // Ensure that commutative instructions that only differ by a permutation // of their operands get the same value number by sorting the operand value @@ -350,6 +346,34 @@ return e; } +GVNPass::Expression GVNPass::ValueTable::createCallExpr(CallInst *C) { + Expression E; + E.type = C->getType(); + E.opcode = C->getOpcode(); + if (const GCRelocateInst *GCR = dyn_cast(C)) { + // gc.relocate is 'special' call: its second and third operands are + // not real values, but indices into statepoint's argument list. + // Use the refered to values for purposes of identity. + E.varargs.push_back(lookupOrAdd(GCR->getOperand(0))); + E.varargs.push_back(lookupOrAdd(GCR->getBasePtr())); + E.varargs.push_back(lookupOrAdd(GCR->getDerivedPtr())); + return E; + } + + for (Use &Op : C->operands()) + E.varargs.push_back(lookupOrAdd(Op)); + + if (C->isCommutative()) { + // There are commutative instrinsic calls. + assert(C->getNumOperands() >= 2 && "Unsupported commutative instruction!"); + if (E.varargs[0] > E.varargs[1]) + std::swap(E.varargs[0], E.varargs[1]); + E.commutative = true; + } + + return E; +} + GVNPass::Expression GVNPass::ValueTable::createCmpExpr( unsigned Opcode, CmpInst::Predicate Predicate, Value *LHS, Value *RHS) { assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && @@ -449,107 +473,55 @@ NumberingPhi[num] = PN; } +// Include the incoming memory state into the hash for the instruction. If the +// incoming memory state is: +// * LiveOnEntry, add the value number of the entry block +// * a MemoryPhi, add the value number of the basic block, +// corresponding to that MemoryPhi +// * a MemoryDef, add the value number of the memory setting +// instruction. +void GVNPass::ValueTable::addMemoryStateArg(Instruction *I, Expression &E) { + assert(MSSA && "Function should not be called without MemorySSA"); + assert(MSSA->getMemoryAccess(I) && "Instruction does not access memory"); + MemoryAccess *MA = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(I); + E.varargs.push_back(lookupOrAdd(MA)); +} + uint32_t GVNPass::ValueTable::lookupOrAddCall(CallInst *C) { if (AA->doesNotAccessMemory(C)) { - Expression exp = createExpr(C); - uint32_t e = assignExpNewValueNum(exp).first; - valueNumbering[C] = e; - return e; - } else if (MD && AA->onlyReadsMemory(C)) { - Expression exp = createExpr(C); - auto ValNum = assignExpNewValueNum(exp); - if (ValNum.second) { - valueNumbering[C] = ValNum.first; - return ValNum.first; - } - - MemDepResult local_dep = MD->getDependency(C); - - if (!local_dep.isDef() && !local_dep.isNonLocal()) { - valueNumbering[C] = nextValueNumber; - return nextValueNumber++; - } - - if (local_dep.isDef()) { - // For masked load/store intrinsics, the local_dep may actully be - // a normal load or store instruction. - CallInst *local_cdep = dyn_cast(local_dep.getInst()); - - if (!local_cdep || local_cdep->arg_size() != C->arg_size()) { - valueNumbering[C] = nextValueNumber; - return nextValueNumber++; - } - - for (unsigned i = 0, e = C->arg_size(); i < e; ++i) { - uint32_t c_vn = lookupOrAdd(C->getArgOperand(i)); - uint32_t cd_vn = lookupOrAdd(local_cdep->getArgOperand(i)); - if (c_vn != cd_vn) { - valueNumbering[C] = nextValueNumber; - return nextValueNumber++; - } - } - - uint32_t v = lookupOrAdd(local_cdep); - valueNumbering[C] = v; - return v; - } - - // Non-local case. - const MemoryDependenceResults::NonLocalDepInfo &deps = - MD->getNonLocalCallDependency(C); - // FIXME: Move the checking logic to MemDep! - CallInst* cdep = nullptr; - - // Check to see if we have a single dominating call instruction that is - // identical to C. - for (unsigned i = 0, e = deps.size(); i != e; ++i) { - const NonLocalDepEntry *I = &deps[i]; - if (I->getResult().isNonLocal()) - continue; - - // We don't handle non-definitions. If we already have a call, reject - // instruction dependencies. - if (!I->getResult().isDef() || cdep != nullptr) { - cdep = nullptr; - break; - } - - CallInst *NonLocalDepCall = dyn_cast(I->getResult().getInst()); - // FIXME: All duplicated with non-local case. - if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){ - cdep = NonLocalDepCall; - continue; - } - - cdep = nullptr; - break; - } - - if (!cdep) { - valueNumbering[C] = nextValueNumber; - return nextValueNumber++; - } + Expression Exp = createCallExpr(C); + uint32_t E = assignExpNewValueNum(Exp).first; + valueNumbering[C] = E; + return E; + } - if (cdep->arg_size() != C->arg_size()) { - valueNumbering[C] = nextValueNumber; - return nextValueNumber++; - } - for (unsigned i = 0, e = C->arg_size(); i < e; ++i) { - uint32_t c_vn = lookupOrAdd(C->getArgOperand(i)); - uint32_t cd_vn = lookupOrAdd(cdep->getArgOperand(i)); - if (c_vn != cd_vn) { - valueNumbering[C] = nextValueNumber; - return nextValueNumber++; - } - } + if (MSSA && AA->onlyReadsMemory(C)) { + Expression E = createCallExpr(C); + addMemoryStateArg(C, E); + uint32_t VN = assignExpNewValueNum(E).first; + valueNumbering[C] = VN; + return VN; + } + valueNumbering[C] = nextValueNumber; + return nextValueNumber++; +} - uint32_t v = lookupOrAdd(cdep); - valueNumbering[C] = v; - return v; - } else { - valueNumbering[C] = nextValueNumber; +uint32_t GVNPass::ValueTable::lookupOrAddLoadStore(Instruction *I) { + if (MSSA == nullptr) { + valueNumbering[I] = nextValueNumber; return nextValueNumber++; } + + Expression E; + E.type = I->getType(); + E.opcode = I->getOpcode(); + for (Use &Op : I->operands()) + E.varargs.push_back(lookupOrAdd(Op)); + addMemoryStateArg(I, E); + + uint32_t N = assignExpNewValueNum(E).first; + valueNumbering[I] = N; + return N; } /// Returns true if a value number exists for the specified value. @@ -557,6 +529,12 @@ return valueNumbering.count(V) != 0; } +uint32_t GVNPass::ValueTable::lookupOrAdd(MemoryAccess *MA) { + return MSSA->isLiveOnEntryDef(MA) || isa(MA) + ? lookupOrAdd(MA->getBlock()) + : lookupOrAdd(cast(MA)->getMemoryInst()); +} + /// lookup_or_add - Returns the value number for the specified value, assigning /// it a new number if it did not have one before. uint32_t GVNPass::ValueTable::lookupOrAdd(Value *V) { @@ -566,6 +544,10 @@ if (!isa(V)) { valueNumbering[V] = nextValueNumber; + if (MSSA) { + if (auto *BB = dyn_cast(V)) + NumberingBB[nextValueNumber] = BB; + } return nextValueNumber++; } @@ -626,6 +608,9 @@ valueNumbering[V] = nextValueNumber; NumberingPhi[nextValueNumber] = cast(V); return nextValueNumber++; + case Instruction::Load: + case Instruction::Store: + return lookupOrAddLoadStore(I); default: valueNumbering[V] = nextValueNumber; return nextValueNumber++; @@ -663,6 +648,7 @@ valueNumbering.clear(); expressionNumbering.clear(); NumberingPhi.clear(); + NumberingBB.clear(); PhiTranslateTable.clear(); nextValueNumber = 1; Expressions.clear(); @@ -677,6 +663,8 @@ // If V is PHINode, V <--> value number is an one-to-one mapping. if (isa(V)) NumberingPhi.erase(Num); + else if (isa(V)) + NumberingBB.erase(Num); } /// verifyRemoved - Verify that the value is removed from all internal data @@ -709,8 +697,8 @@ GVNEnableSplitBackedgeInLoadPRE); } -bool GVNPass::isMemDepEnabled() const { - return Options.AllowMemDep.value_or(GVNEnableMemDep); +bool GVNPass::isMemorySSAEnabled() const { + return Options.AllowMemorySSA.getValueOr(GVNEnableMemorySSA); } PreservedAnalyses GVNPass::run(Function &F, FunctionAnalysisManager &AM) { @@ -722,13 +710,12 @@ auto &DT = AM.getResult(F); auto &TLI = AM.getResult(F); auto &AA = AM.getResult(F); - auto *MemDep = - isMemDepEnabled() ? &AM.getResult(F) : nullptr; auto *LI = AM.getCachedResult(F); - auto *MSSA = AM.getCachedResult(F); + auto *MSSA = + isMemorySSAEnabled() ? &AM.getResult(F) : nullptr; auto &ORE = AM.getResult(F); - bool Changed = runImpl(F, AC, DT, TLI, AA, MemDep, LI, &ORE, - MSSA ? &MSSA->getMSSA() : nullptr); + bool Changed = + runImpl(F, AC, DT, TLI, AA, LI, &ORE, MSSA ? &MSSA->getMSSA() : nullptr); if (!Changed) return PreservedAnalyses::all(); PreservedAnalyses PA; @@ -754,8 +741,8 @@ if (Options.AllowLoadPRESplitBackedge != None) OS << (Options.AllowLoadPRESplitBackedge.value() ? "" : "no-") << "split-backedge-load-pre;"; - if (Options.AllowMemDep != None) - OS << (Options.AllowMemDep.value() ? "" : "no-") << "memdep"; + if (Options.AllowMemorySSA != None) + OS << (Options.AllowMemorySSA.value() ? "" : "no-") << "memoryssa"; OS << ">"; } @@ -989,7 +976,6 @@ // tracks. It is potentially possible to remove the load from the table, // but then there all of the operations based on it would need to be // rehashed. Just leave the dead load around. - gvn.getMemDep().removeInstruction(CoercedLoad); LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " " << *getCoercedLoadValue() << '\n' << *Res << '\n' @@ -1039,7 +1025,7 @@ /// Try to locate the three instruction involved in a missed /// load-elimination case that is due to an intervening store. -static void reportMayClobberedLoad(LoadInst *Load, MemDepResult DepInfo, +static void reportMayClobberedLoad(LoadInst *Load, Instruction *DepInst, DominatorTree *DT, OptimizationRemarkEmitter *ORE) { using namespace ore; @@ -1094,7 +1080,7 @@ if (OtherAccess) R << " in favor of " << NV("OtherAccess", OtherAccess); - R << " because it is clobbered by " << NV("ClobberedBy", DepInfo.getInst()); + R << " because it is clobbered by " << NV("ClobberedBy", DepInst); ORE->emit(R); } @@ -1134,9 +1120,9 @@ return AvailableValue::getSelect(Sel); } -bool GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo, +bool GVNPass::AnalyzeLoadAvailability(LoadInst *Load, const ReachingMemVal &Dep, Value *Address, AvailableValue &Res) { - if (!DepInfo.isDef() && !DepInfo.isClobber()) { + if (Dep.Kind == DepKind::Other) { assert(isa(Address)); if (auto R = tryToConvertLoadOfPtrSelect( Load->getParent(), Load->getIterator(), Address, Load->getType(), @@ -1147,14 +1133,14 @@ return false; } - assert((DepInfo.isDef() || DepInfo.isClobber()) && + assert((Dep.Kind == DepKind::Def || Dep.Kind == DepKind::Clobber) && "expected a local dependence"); assert(Load->isUnordered() && "rules below are incorrect for ordered access"); const DataLayout &DL = Load->getModule()->getDataLayout(); - Instruction *DepInst = DepInfo.getInst(); - if (DepInfo.isClobber()) { + Instruction *DepInst = Dep.Inst; + if (Dep.Kind == DepKind::Clobber) { // If the dependence is to a store that writes to a superset of the bits // read by the load, we can extract the bits we need for the load from the // stored value. @@ -1181,15 +1167,10 @@ if (DepLoad != Load && Address && Load->isAtomic() <= DepLoad->isAtomic()) { Type *LoadType = Load->getType(); - int Offset = -1; - - // If MD reported clobber, check it was nested. - if (DepInfo.isClobber() && - canCoerceMustAliasedValueToLoad(DepLoad, LoadType, DL)) { - const auto ClobberOff = MD->getClobberOffset(DepLoad); - // GVN has no deal with a negative offset. - Offset = (ClobberOff == None || *ClobberOff < 0) ? -1 : *ClobberOff; - } + int Offset = Dep.Offset; + if (!canCoerceMustAliasedValueToLoad(DepLoad, LoadType, DL) || + Offset < 0) + Offset = -1; if (Offset == -1) Offset = analyzeLoadFromClobberingLoad(LoadType, Address, DepLoad, DL); @@ -1219,11 +1200,11 @@ dbgs() << "GVN: load "; Load->printAsOperand(dbgs()); dbgs() << " is clobbered by " << *DepInst << '\n';); if (ORE->allowExtraAnalysis(DEBUG_TYPE)) - reportMayClobberedLoad(Load, DepInfo, DT, ORE); + reportMayClobberedLoad(Load, DepInst, DT, ORE); return false; } - assert(DepInfo.isDef() && "follows from above"); + assert(Dep.Kind == DepKind::Def && "follows from above"); // Loading the alloca -> undef. // Loading immediately after lifetime begin -> undef. @@ -1277,7 +1258,8 @@ return false; } -void GVNPass::AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps, +void GVNPass::AnalyzeLoadAvailability(LoadInst *Load, + SmallVectorImpl &Deps, AvailValInBlkVect &ValuesPerBlock, UnavailBlkVect &UnavailableBlocks) { // Filter out useless results (non-locals, etc). Keep track of the blocks @@ -1286,22 +1268,20 @@ // that could potentially clobber the load). unsigned NumDeps = Deps.size(); for (unsigned i = 0, e = NumDeps; i != e; ++i) { - BasicBlock *DepBB = Deps[i].getBB(); - MemDepResult DepInfo = Deps[i].getResult(); - + const auto &Dep = Deps[i]; + BasicBlock *DepBB = Dep.Block; if (DeadBlocks.count(DepBB)) { // Dead dependent mem-op disguise as a load evaluating the same value // as the load in question. - ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(DepBB)); + ValuesPerBlock.push_back(AvailableValueInBlock::getUndef(Dep.Block)); continue; } // The address being loaded in this non-local block may not be the same as // the pointer operand of the load if PHI translation occurs. Make sure // to consider the right address. - Value *Address = Deps[i].getAddress(); - - if (!DepInfo.isDef() && !DepInfo.isClobber()) { + Value *Address = const_cast(Deps[i].Addr); + if (Dep.Kind == DepKind::Other) { if (auto R = tryToConvertLoadOfPtrSelect( DepBB, DepBB->end(), Address, Load->getType(), getDominatorTree(), getAliasAnalysis())) { @@ -1314,7 +1294,7 @@ } AvailableValue AV; - if (AnalyzeLoadAvailability(Load, DepInfo, Address, AV)) { + if (AnalyzeLoadAvailability(Load, Dep, Address, AV)) { // subtlety: because we know this was a non-local dependency, we know // it's safe to materialize anywhere between the instruction within // DepInfo and the end of it's block. @@ -1383,7 +1363,6 @@ // Add the newly created load. ValuesPerBlock.push_back( AvailableValueInBlock::get(UnavailableBlock, NewLoad)); - MD->invalidateCachedPointerInfo(LoadPtr); LLVM_DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n'); } @@ -1394,8 +1373,6 @@ V->takeName(Load); if (Instruction *I = dyn_cast(V)) I->setDebugLoc(Load->getDebugLoc()); - if (V->getType()->isPtrOrPtrVectorTy()) - MD->invalidateCachedPointerInfo(V); markInstructionForDeletion(Load); ORE->emit([&]() { return OptimizationRemark(DEBUG_TYPE, "LoadPRE", Load) @@ -1729,29 +1706,11 @@ /// Attempt to eliminate a load whose dependencies are /// non-local by performing PHI construction. -bool GVNPass::processNonLocalLoad(LoadInst *Load) { - // non-local speculations are not allowed under asan. - if (Load->getParent()->getParent()->hasFnAttribute( - Attribute::SanitizeAddress) || - Load->getParent()->getParent()->hasFnAttribute( - Attribute::SanitizeHWAddress)) - return false; - - // Step 1: Find the non-local dependencies of the load. - LoadDepVect Deps; - MD->getNonLocalPointerDependency(Load, Deps); - - // If we had to process more than one hundred blocks to find the - // dependencies, this load isn't worth worrying about. Optimizing - // it will be too expensive. - unsigned NumDeps = Deps.size(); - if (NumDeps > MaxNumDeps) - return false; - +bool GVNPass::processNonLocalLoad(LoadInst *Load, + SmallVectorImpl &Deps) { // If we had a phi translation failure, we'll have a single entry which is a // clobber in the current block. Reject this early. - if (NumDeps == 1 && - !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) { + if (Deps.size() == 1 && Deps[0].Kind == DepKind::Other) { LLVM_DEBUG(dbgs() << "GVN: non-local load "; Load->printAsOperand(dbgs()); dbgs() << " has unknown dependencies\n";); return false; @@ -1796,8 +1755,6 @@ // to propagate Load's DebugLoc because Load may not post-dominate I. if (Load->getDebugLoc() && Load->getParent() == I->getParent()) I->setDebugLoc(Load->getDebugLoc()); - if (V->getType()->isPtrOrPtrVectorTy()) - MD->invalidateCachedPointerInfo(V); markInstructionForDeletion(Load); ++NumGVNLoad; reportLoadElim(Load, V, ORE); @@ -2020,10 +1977,535 @@ I->replaceAllUsesWith(Repl); } +static Instruction *findInvariantGroupValue(LoadInst *L, DominatorTree &DT) { + // We consider bitcasts and zero GEPs to be the same pointer value. Start by + // stripping bitcasts and zero GEPs, then we will recursively look at loads + // and stores through bitcasts and zero GEPs. + Value *PointerOperand = L->getPointerOperand()->stripPointerCasts(); + + // It's not safe to walk the use list of a global value because function + // passes aren't allowed to look outside their functions. + // FIXME: this could be fixed by filtering instructions from outside of + // current function. + if (isa(PointerOperand)) + return nullptr; + + // Queue to process all pointers that are equivalent to load operand. + SmallVector PointerUsesQueue; + PointerUsesQueue.push_back(PointerOperand); + + Instruction *MostDominatingInstruction = L; + + // FIXME: This loop is O(n^2) because dominates can be O(n) and in worst case + // we will see all the instructions. + while (!PointerUsesQueue.empty()) { + Value *Ptr = PointerUsesQueue.pop_back_val(); + assert(Ptr && !isa(Ptr) && + "Null or GlobalValue should not be inserted"); + + for (User *Us : Ptr->users()) { + auto *U = dyn_cast(Us); + if (!U || U == L || !DT.dominates(U, MostDominatingInstruction)) + continue; + + // Add bitcasts and zero GEPs to queue. + if (isa(U)) { + PointerUsesQueue.push_back(U); + continue; + } + if (auto *GEP = dyn_cast(U)) { + if (GEP->hasAllZeroIndices()) + PointerUsesQueue.push_back(U); + continue; + } + + // If we hit a load/store with an invariant.group metadata and the same + // pointer operand, we can assume that value pointed to by the pointer + // operand didn't change. + if (U->hasMetadata(LLVMContext::MD_invariant_group) && + getLoadStorePointerOperand(U) == Ptr && !U->isVolatile()) { + MostDominatingInstruction = U; + } + } + } + return MostDominatingInstruction == L ? nullptr : MostDominatingInstruction; +} + +// Return the memory location, accessed by the [masked]load/store instruction +// `I`, if the instruction could potentially provide a useful value for +// elimiating the load. +static Optional +getLoadStoreLocationIfInteresting(const Instruction *I, + bool StoresAreInteresting, + const TargetLibraryInfo *TLI) { + if (auto *L = dyn_cast(I)) + return MemoryLocation::get(L); + + if (auto *II = dyn_cast(I)) { + if (II->getIntrinsicID() == Intrinsic::masked_load) + return MemoryLocation::getForArgument(II, 0, TLI); + } + + if (!StoresAreInteresting) + return None; + + if (auto *S = dyn_cast(I)) + return MemoryLocation::get(S); + + if (auto *II = dyn_cast(I)) { + if (II->getIntrinsicID() == Intrinsic::masked_store) + return MemoryLocation::getForArgument(II, 1, TLI); + } + + return None; +} + +static AtomicOrdering getOrdering(const Instruction *I) { + assert(isa(I) || isa(I)); + if (const auto *L = dyn_cast(I)) + return L->getOrdering(); + return cast(I)->getOrdering(); +} + +Optional +GVNPass::scanUsers(MemorySSA &MSSA, BatchAAResults &AA, BasicBlock &BB, + const SmallVectorImpl &Clobbers, + const MemoryLocation &Loc, bool IsInvariantLoad, + LoadInst *L) { + + auto updateChoice = [&](Optional &Choice, AliasResult &AR, + Instruction *Candidate) { + if (!Choice) { + if (AR == AliasResult::PartialAlias) + Choice = ReachingMemVal::getClobber(Loc.Ptr, Candidate, AR.getOffset()); + else + Choice = ReachingMemVal::getDef(Loc.Ptr, Candidate); + return; + } + + if (!MSSA.locallyDominates(MSSA.getMemoryAccess(Choice->Inst), + MSSA.getMemoryAccess(Candidate))) + return; + + Choice->Inst = Candidate; + + if (AR == AliasResult::PartialAlias) { + Choice->Kind = DepKind::Clobber; + Choice->Offset = AR.getOffset(); + } else { + Choice->Kind = DepKind::Def; + Choice->Offset = -1; + } + }; + + Optional Result; + for (MemoryAccess *ClobberMA : Clobbers) { + for (User *U : ClobberMA->users()) { + auto *UseOrDef = dyn_cast(U); + if (UseOrDef == nullptr || UseOrDef->getBlock() != &BB) + continue; + + Instruction *I = UseOrDef->getMemoryInst(); + Optional M = + getLoadStoreLocationIfInteresting(I, IsInvariantLoad, TLI); + if (!M) + continue; + + if (I == L || (L != nullptr && + !MSSA.locallyDominates(UseOrDef, MSSA.getMemoryAccess(L)))) + continue; + + AliasResult AR = AA.alias(*M, Loc); + // If the locations do not certainly alias, we cannot possibly infer + // the following load loads the same value. + if (AR == AliasResult::NoAlias || AR == AliasResult::MayAlias) + continue; + + // Locations partially overlap, but neither is a subset of the other, + // or the + // second location is before the first. + if (AR == AliasResult::PartialAlias && + (!AR.hasOffset() || AR.getOffset() < 0)) + continue; + + // Locations precisely overlap or the second accesses subset of the + // bits of the first. + updateChoice(Result, AR, I); + } + + if (Result != None) + break; + } + + return Result; +} + +// Check if a MemoryAcess (usually a MemoryDef) actually modifies a given +// location. +Optional GVNPass::accessModifiesLocation( + const MemorySSA &MSSA, BatchAAResults &AA, const MemoryAccess &ClobberMA, + const MemoryLocation &Loc, bool IsInvariantLoad, BasicBlock &BB) { + + assert(ClobberMA.getBlock() == &BB); + + // If the clobbering memory access is LoE, we cannot say anything about the + // content of the memory, except when we're accessing a local, which can be + // turned later into producing `undef`. + if (MSSA.isLiveOnEntryDef(&ClobberMA)) { + auto *Alloc = dyn_cast(getUnderlyingObject(Loc.Ptr)); + if (Alloc != nullptr && Alloc->getParent() == &BB) + return ReachingMemVal::getDef(Loc.Ptr, const_cast(Alloc)); + return ReachingMemVal::getUnknown(&BB, Loc.Ptr); + } + + // Check if the clobbering access is a load or a store. + Instruction *ClobberInst = cast(ClobberMA).getMemoryInst(); + if (Optional M = + getLoadStoreLocationIfInteresting(ClobberInst, true, TLI)) { + AliasResult AR = AA.alias(*M, Loc); + if (AR == AliasResult::MustAlias) + return ReachingMemVal::getDef(Loc.Ptr, ClobberInst); + + if (AR == AliasResult::NoAlias) { + // The original load is either non-atomic or unordered. We can reorder + // these across non-atomic, unordered or monotonic loads or across any + // store. + if (!ClobberInst->isAtomic() || + !isStrongerThan(getOrdering(ClobberInst), + AtomicOrdering::Monotonic) || + isa(ClobberInst)) + return None; + return ReachingMemVal::getClobber(Loc.Ptr, ClobberInst); + } + + if (IsInvariantLoad) + return None; + + // Skip over volatile loads (the orignal load is non-volatile, + // non-atomic). + if (!ClobberInst->isAtomic() && isa(ClobberInst)) + return None; + + // We have a store with unknown or non-reusable overlap. + if (AR == AliasResult::MayAlias || + (AR == AliasResult::PartialAlias && + (!AR.hasOffset() || AR.getOffset() < 0))) + return ReachingMemVal::getClobber(Loc.Ptr, ClobberInst); + + // The only option left is a store of the superset of the required bits. + assert(AR == AliasResult::PartialAlias && AR.hasOffset() && + AR.getOffset() > 0 && "Follows from the conditions above"); + return ReachingMemVal::getClobber(Loc.Ptr, ClobberInst, AR.getOffset()); + } + + // Loads from "constant" memory can't be clobbered. + if (IsInvariantLoad || AA.pointsToConstantMemory(Loc)) + return None; + + if (const IntrinsicInst *II = dyn_cast(ClobberInst)) { + if (isa(II)) + return None; + if (II->getIntrinsicID() == Intrinsic::lifetime_start) { + MemoryLocation M = MemoryLocation::getForArgument(II, 1, TLI); + if (AA.isMustAlias(M, Loc)) + return ReachingMemVal::getDef(Loc.Ptr, ClobberInst); + return None; + } + } + + // If we are at a malloc-like function call, we can turn the load into `undef` + // or zero. + if (isNoAliasCall(ClobberInst)) { + const Value *Obj = getUnderlyingObject(Loc.Ptr); + if (Obj == ClobberInst || AA.isMustAlias(ClobberInst, Loc.Ptr)) + return ReachingMemVal::getDef(Loc.Ptr, ClobberInst); + } + + // Can reorder loads across a release fence. + if (auto *Fence = dyn_cast(ClobberInst)) { + if (Fence->getOrdering() == AtomicOrdering::Release) + return None; + } + + // See if the clobber instruction (e.g. a call) may modify the location. + ModRefInfo MR = AA.getModRefInfo(ClobberInst, Loc); + // If modification is possible, analyse deeper, to exclude accesses to + // non-escaping local allocations. + if (isModAndRefSet(MR)) + MR = AA.callCapturesBefore(ClobberInst, Loc, DT); + MR = clearMust(MR); + if (MR == ModRefInfo::NoModRef || MR == ModRefInfo::Ref) + return None; + + // Conservatively assume the clobbering memory access indeed clobbers the + // location. + return ReachingMemVal::getClobber(Loc.Ptr, ClobberInst); +} + +// Collect the predecessors of block, doing phi-translation of the memory +// address and the memory clobber. Return false if the block should be marked as +// clobbering the memory location in an unknown way. +bool GVNPass::collectPredecessors(BasicBlock *BB, const PHITransAddr &Addr, + MemoryAccess *ClobberMA, + DependencyBlockSet &Blocks, + SmallVectorImpl &Worklist) { + if (Addr.NeedsPHITranslationFromBlock(BB) && + !Addr.IsPotentiallyPHITranslatable()) + return false; + auto *MPhi = + ClobberMA->getBlock() == BB ? dyn_cast(ClobberMA) : nullptr; + + SmallVector, 8> Preds; + for (BasicBlock *Pred : predecessors(BB)) { + // Skip unreachable predecessors. + if (!DT->isReachableFromEntry(Pred)) + continue; + // Skip duplicate predecessors. + if (llvm::any_of(Preds, [Pred](const auto &P) { return P.first == Pred; })) + continue; + PHITransAddr TransAddr = Addr; + if (TransAddr.NeedsPHITranslationFromBlock(BB)) + TransAddr.PHITranslateValue(BB, Pred, DT, false); + auto It = Blocks.find(Pred); + if (It != Blocks.end()) { + // If we reach a visited block with a different address, set the + // current block as clobbering the memory location in an unknown way + // (by returning false). + if (It->second.Addr.getAddr() != TransAddr.getAddr()) + return false; + // Otherwise just stop the traversal. + continue; + } + Preds.emplace_back( + Pred, DependencyBlockInfo(TransAddr, + MPhi == nullptr + ? ClobberMA + : MPhi->getIncomingValueForBlock(Pred))); + } + + for (auto &P : Preds) { + (void)Blocks.try_emplace(P.first, std::move(P.second)).first->second; + Worklist.push_back(P.first); + } + + return true; +} + +// Collection a list of memory clobbers, such that their memory uses could +// alias our memory location. +void GVNPass::collectClobberList(MemorySSA &MSSA, BasicBlock *BB, + const DependencyBlockInfo &StartInfo, + const DependencyBlockSet &Blocks, + SmallVectorImpl &Clobbers) { + MemoryAccess *MA = StartInfo.InitialClobberMA; + MemoryAccess *LastMA = StartInfo.ClobberMA; + for (;;) { + while (MA != LastMA) { + Clobbers.push_back(MA); + MA = cast(MA)->getDefiningAccess(); + } + Clobbers.push_back(MA); + + if (MSSA.isLiveOnEntryDef(MA)) + break; + + if (MA->getBlock() == BB && !isa(MA)) + break; + + if (MA->getBlock() == BB) + BB = DT->getNode(BB)->getIDom()->getBlock(); + else + BB = MA->getBlock(); + + auto It = Blocks.find(BB); + if (It == Blocks.end()) + break; + MA = It->second.InitialClobberMA; + LastMA = It->second.ClobberMA; + if (MA == Clobbers.back()) + Clobbers.pop_back(); + } +} + +// Find the set of all the reaching memory definitions for the location +// referred to by the pointer operand of the given load instruction. Definitely +// aliasing memory reads are treated as definitions, for the purposes of this +// function. +bool GVNPass::findReachingValuesForLoad( + LoadInst *L, MemorySSA &MSSA, AAResults &AAR, + SmallVectorImpl &Values) { + + BatchAAResults AA(AAR); + BasicBlock &StartBlock = *L->getParent(); + bool IsInvariantLoad = L->hasMetadata(LLVMContext::MD_invariant_load); + MemoryAccess *ClobberMA = MSSA.getMemoryAccess(L)->getDefiningAccess(); + auto Loc = MemoryLocation::get(L); + + if (L->hasMetadata(LLVMContext::MD_invariant_group)) { + if (Instruction *G = findInvariantGroupValue(L, *DT)) { + Values.push_back( + ReachingMemVal::getDef(getLoadStorePointerOperand(G), G)); + return true; + } + } + + // Look for a local dependency. Doing this first allows us to avoid having to + // disambiguate between the parts of the initial basic block before and after + // the original load instruction (when entered from a backedge). + do { + // Scan users of the clobbering memory access. + if (Optional R = scanUsers( + MSSA, AA, StartBlock, SmallVector{ClobberMA}, + Loc, IsInvariantLoad, L)) { + Values.push_back(*R); + return true; + } + + if (ClobberMA->getBlock() != &StartBlock || isa(ClobberMA)) + break; + + // Check if the clobber actually modifies the load location. + if (Optional R = accessModifiesLocation( + MSSA, AA, *ClobberMA, Loc, IsInvariantLoad, StartBlock)) { + Values.push_back(*R); + return true; + } + + // It may happend that the clobbering memory access does not actually + // clobber our load location, transition to *its* clobbering memory access. + ClobberMA = cast(ClobberMA)->getDefiningAccess(); + } while (ClobberMA->getBlock() == &StartBlock); + + // Non-local speculations are not allowed under asan. + if (L->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeAddress) || + L->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeHWAddress)) + return false; + + // Walk backwards through the CFG, collecting blocks in the way, terminating + // at blocks/instructions, which definitely define our memory location + // (perhaps in an unknown way). + // Begin by collecting the predecessors of the initial basic block as starting points for + // the walk. + DependencyBlockSet Blocks; + SmallVector InitialWorklist; + const DataLayout &DL = L->getModule()->getDataLayout(); + if (!collectPredecessors(&StartBlock, + PHITransAddr(L->getPointerOperand(), DL, AC), + ClobberMA, Blocks, InitialWorklist)) + return false; + + // Do a depth-first search in the reverse CFG. + auto Worklist = InitialWorklist; + while (!Worklist.empty()) { + BasicBlock *BB = Worklist.pop_back_val(); + auto It = Blocks.find(BB); + DependencyBlockInfo &Info = It->second; + + // Phi-translation may have failed. + if (Info.Addr.getAddr() == nullptr) + continue; + + // If the clobbering memory access is in the current block and it indeed + // clobbers our load location, record the dependency and stop the + // traversal. + if (Info.ClobberMA->getBlock() == BB && !isa(Info.ClobberMA)) { + if (Optional R = accessModifiesLocation( + MSSA, AA, *Info.ClobberMA, Loc.getWithNewPtr(Info.Addr.getAddr()), + IsInvariantLoad, *BB)) { + Info.MemVal = R; + continue; + } + assert(!MSSA.isLiveOnEntryDef(Info.ClobberMA) && + "liveOnEntry aliases everything"); + + // If, however, the clobbering memory access does not actually clobber + // our load location, transition to *its* clobbering memory access, but + // keep examining the same basic block. + Info.ClobberMA = + cast(Info.ClobberMA)->getDefiningAccess(); + Worklist.push_back(BB); + continue; + } + + // At this point we know the current block is "transparent", i.e. the memory + // location is not modified when execution goes through this block. + // Continue to its predecessors, unless a predecessor has already been + // visited with a different address. We currently cannot represent such a + // dependency. + if (BB == &StartBlock && Info.Addr.getAddr() != L->getPointerOperand()) { + Info.ForceUnknown = true; + continue; + } + if (BB != &StartBlock && + !collectPredecessors(BB, Info.Addr, Info.ClobberMA, Blocks, Worklist)) + Info.ForceUnknown = true; + } + + // Now we have collected all the blocks, that write a (possibly unknown) + // value to our memory location and that there exists a path to the load + // instruction, along which the memory location is not modified. Do a second + // traversal, looking at memory loads. + Worklist = InitialWorklist; + for (BasicBlock *BB : Worklist) { + auto It = Blocks.find(BB); + DependencyBlockInfo &Info = It->second; + Info.Visited = true; + } + + SmallVector Clobbers; + while (!Worklist.empty()) { + BasicBlock *BB = Worklist.pop_back_val(); + auto It = Blocks.find(BB); + DependencyBlockInfo &Info = It->second; + + // If phi-translation failed, assume the memory location is modified in + // unknown way. + if (Info.Addr.getAddr() == nullptr) { + Values.push_back(ReachingMemVal::getUnknown(BB, nullptr)); + continue; + } + + Clobbers.clear(); + collectClobberList(MSSA, BB, Info, Blocks, Clobbers); + if (Optional R = scanUsers( + MSSA, AA, *BB, Clobbers, Loc.getWithNewPtr(Info.Addr.getAddr()), + IsInvariantLoad)) { + Values.push_back(*R); + continue; + } + + // If no reusable memory use was found, and the current block is not + // transparent, use the already established memory def. + if (Info.MemVal) { + Values.push_back(*Info.MemVal); + continue; + } + + if (Info.ForceUnknown) { + Values.push_back(ReachingMemVal::getUnknown(BB, Info.Addr.getAddr())); + continue; + } + + // If the current block is transparent, continue to its predecessors. + for (BasicBlock *Pred : predecessors(BB)) { + auto It = Blocks.find(Pred); + if (It == Blocks.end()) + continue; + DependencyBlockInfo &PredInfo = It->second; + if (PredInfo.Visited) + continue; + PredInfo.Visited = true; + Worklist.push_back(Pred); + } + } + + return true; +} + /// Attempt to eliminate a load, first by eliminating it /// locally, and then attempting non-local elimination if that fails. bool GVNPass::processLoad(LoadInst *L) { - if (!MD) + if (!MSSAU) return false; // This code hasn't been audited for ordered or volatile memory access @@ -2035,17 +2517,15 @@ return true; } - // ... to a pointer that has been loaded from before... - MemDepResult Dep = MD->getDependency(L); - - // If it is defined in another block, try harder. - if (Dep.isNonLocal()) - return processNonLocalLoad(L); + SmallVector MemVals; + if (!findReachingValuesForLoad(L, *MSSAU->getMemorySSA(), *AA, MemVals)) + return false; // Too many dependencies. + assert(MemVals.size() && "Expected at least an unknown value"); + if (MemVals.size() > 1 || MemVals[0].Block != L->getParent()) + return processNonLocalLoad(L, MemVals); Value *Address = L->getPointerOperand(); - // Only handle the local case below - if (!Dep.isDef() && !Dep.isClobber() && !isa(Address)) { - // This might be a NonFuncLocal or an Unknown + if (MemVals[0].Kind == DepKind::Other && !isa(Address)) { LLVM_DEBUG( // fast print dep, using operator<< on instruction is too slow. dbgs() << "GVN: load "; L->printAsOperand(dbgs()); @@ -2054,7 +2534,7 @@ } AvailableValue AV; - if (AnalyzeLoadAvailability(L, Dep, Address, AV)) { + if (AnalyzeLoadAvailability(L, MemVals[0], Address, AV)) { Value *AvailableValue = AV.MaterializeAdjustedValue(L, L, *this); // Replace the load! @@ -2064,10 +2544,6 @@ MSSAU->removeMemoryAccess(L); ++NumGVNLoad; reportLoadElim(L, AvailableValue, ORE); - // Tell MDA to reexamine the reused pointer since we might have more - // information after forwarding it. - if (MD && AvailableValue->getType()->isPtrOrPtrVectorTy()) - MD->invalidateCachedPointerInfo(AvailableValue); return true; } @@ -2127,25 +2603,7 @@ Vals = Vals->Next; } - if (AA->doesNotAccessMemory(Call)) - return true; - - if (!MD || !AA->onlyReadsMemory(Call)) - return false; - - MemDepResult local_dep = MD->getDependency(Call); - if (!local_dep.isNonLocal()) - return false; - - const MemoryDependenceResults::NonLocalDepInfo &deps = - MD->getNonLocalCallDependency(Call); - - // Check to see if the Call has no function local clobber. - for (const NonLocalDepEntry &D : deps) { - if (D.getResult().isNonFuncLocal()) - return true; - } - return false; + return AA->doesNotAccessMemory(Call); } /// Translate value number \p Num using phis, so that it has the values of @@ -2154,14 +2612,38 @@ const BasicBlock *PhiBlock, uint32_t Num, GVNPass &Gvn) { if (PHINode *PN = NumberingPhi[Num]) { + if (PN->getParent() != PhiBlock) + return Num; for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) { - if (PN->getParent() == PhiBlock && PN->getIncomingBlock(i) == Pred) + if (PN->getIncomingBlock(i) == Pred) if (uint32_t TransVal = lookup(PN->getIncomingValue(i), false)) return TransVal; } return Num; } + if (BasicBlock *BB = NumberingBB[Num]) { + assert(MSSA && "NumberingBB is non-empty only when using MemorySSA"); + // Value numbers of basic blocks are used to represent memory state in + // load/store instructions and read-only function calls when said state is + // set by a MemoryPhi. + if (BB != PhiBlock) + return Num; + MemoryPhi *MPhi = MSSA->getMemoryAccess(BB); + for (unsigned I = 0, N = MPhi->getNumIncomingValues(); I != N; ++I) { + if (MPhi->getIncomingBlock(I) != Pred) + continue; + MemoryAccess *MA = MPhi->getIncomingValue(I); + if (auto *PredPhi = dyn_cast(MA)) + return lookupOrAdd(PredPhi->getBlock()); + if (MSSA->isLiveOnEntryDef(MA)) + return lookupOrAdd(&BB->getParent()->getEntryBlock()); + return lookupOrAdd(cast(MA)->getMemoryInst()); + } + llvm_unreachable( + "CFG/MemorySSA mismatch: predecessor not found among incoming blocks"); + } + // If there is any value related with Num is defined in a BB other than // PhiBlock, it cannot depend on a phi in PhiBlock without going through // a backedge. We can do an early exit in that case to save compile time. @@ -2196,7 +2678,7 @@ } if (uint32_t NewNum = expressionNumbering[Exp]) { - if (Exp.opcode == Instruction::Call && NewNum != Num) + if (MSSA == nullptr && Exp.opcode == Instruction::Call && NewNum != Num) return areCallValsEqual(Num, NewNum, Pred, PhiBlock, Gvn) ? NewNum : Num; return NewNum; } @@ -2350,9 +2832,6 @@ Changed |= NumReplacements > 0; NumGVNEqProp += NumReplacements; - // Cached information for anything that uses LHS will be invalid. - if (MD) - MD->invalidateCachedPointerInfo(LHS); } // Now try to deduce additional equalities from this one. For example, if @@ -2413,9 +2892,6 @@ Root.getStart()); Changed |= NumReplacements > 0; NumGVNEqProp += NumReplacements; - // Cached information for anything that uses NotCmp will be invalid. - if (MD) - MD->invalidateCachedPointerInfo(NotCmp); } } // Ensure that any instruction in scope that gets the "A < B" value number @@ -2458,8 +2934,6 @@ Changed = true; } if (Changed) { - if (MD && V->getType()->isPtrOrPtrVectorTy()) - MD->invalidateCachedPointerInfo(V); ++NumGVNSimpl; return true; } @@ -2568,8 +3042,6 @@ // Remove it! patchAndReplaceAllUsesWith(I, Repl); - if (MD && Repl->getType()->isPtrOrPtrVectorTy()) - MD->invalidateCachedPointerInfo(Repl); markInstructionForDeletion(I); return true; } @@ -2577,23 +3049,26 @@ /// runOnFunction - This is the main transformation entry point for a function. bool GVNPass::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, const TargetLibraryInfo &RunTLI, AAResults &RunAA, - MemoryDependenceResults *RunMD, LoopInfo *LI, - OptimizationRemarkEmitter *RunORE, MemorySSA *MSSA) { + LoopInfo *LI, OptimizationRemarkEmitter *RunORE, + MemorySSA *MSSA) { AC = &RunAC; DT = &RunDT; VN.setDomTree(DT); TLI = &RunTLI; + AA = &RunAA; VN.setAliasAnalysis(&RunAA); - MD = RunMD; ImplicitControlFlowTracking ImplicitCFT; ICF = &ImplicitCFT; this->LI = LI; - VN.setMemDep(MD); + VN.setMemorySSA(MSSA); ORE = RunORE; InvalidBlockRPONumbers = true; MemorySSAUpdater Updater(MSSA); MSSAU = MSSA ? &Updater : nullptr; + if (MSSA) + MSSA->ensureOptimizedUses(); + bool Changed = false; bool ShouldContinue = true; @@ -2601,7 +3076,7 @@ // Merge unconditional branches, allowing PRE to catch more // optimization opportunities. for (BasicBlock &BB : llvm::make_early_inc_range(F)) { - bool removedBlock = MergeBlockIntoPredecessor(&BB, &DTU, LI, MSSAU, MD); + bool removedBlock = MergeBlockIntoPredecessor(&BB, &DTU, LI, MSSAU, nullptr); if (removedBlock) ++NumGVNBlocks; @@ -2686,7 +3161,6 @@ LLVM_DEBUG(dbgs() << "GVN removed: " << *I << '\n'); salvageKnowledge(I, AC); salvageDebugInfo(*I); - if (MD) MD->removeInstruction(I); if (MSSAU) MSSAU->removeMemoryAccess(I); LLVM_DEBUG(verifyRemoved(I)); @@ -2912,14 +3386,10 @@ addToLeaderTable(ValNo, Phi, CurrentBlock); Phi->setDebugLoc(CurInst->getDebugLoc()); CurInst->replaceAllUsesWith(Phi); - if (MD && Phi->getType()->isPtrOrPtrVectorTy()) - MD->invalidateCachedPointerInfo(Phi); VN.erase(CurInst); removeFromLeaderTable(ValNo, CurInst, CurrentBlock); LLVM_DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n'); - if (MD) - MD->removeInstruction(CurInst); if (MSSAU) MSSAU->removeMemoryAccess(CurInst); LLVM_DEBUG(verifyRemoved(CurInst)); @@ -2967,11 +3437,8 @@ BasicBlock *BB = SplitCriticalEdge( Pred, Succ, CriticalEdgeSplittingOptions(DT, LI, MSSAU).unsetPreserveLoopSimplify()); - if (BB) { - if (MD) - MD->invalidateCachedPredecessors(); + if (BB) InvalidBlockRPONumbers = true; - } return BB; } @@ -2988,11 +3455,8 @@ CriticalEdgeSplittingOptions(DT, LI, MSSAU)) != nullptr; } while (!toSplit.empty()); - if (Changed) { - if (MD) - MD->invalidateCachedPredecessors(); + if (Changed) InvalidBlockRPONumbers = true; - } return Changed; } @@ -3110,11 +3574,8 @@ for (BasicBlock *P : predecessors(B)) { if (!DeadBlocks.count(P)) continue; - for (PHINode &Phi : B->phis()) { + for (PHINode &Phi : B->phis()) Phi.setIncomingValueForBlock(P, PoisonValue::get(Phi.getType())); - if (MD) - MD->invalidateCachedPointerInfo(&Phi); - } } } } @@ -3173,8 +3634,8 @@ public: static char ID; // Pass identification, replacement for typeid - explicit GVNLegacyPass(bool NoMemDepAnalysis = !GVNEnableMemDep) - : FunctionPass(ID), Impl(GVNOptions().setMemDep(!NoMemDepAnalysis)) { + explicit GVNLegacyPass(bool MemSSAAnalysis = GVNEnableMemorySSA) + : FunctionPass(ID), Impl(GVNOptions().setMemorySSA(MemSSAAnalysis)) { initializeGVNLegacyPassPass(*PassRegistry::getPassRegistry()); } @@ -3183,19 +3644,16 @@ return false; auto *LIWP = getAnalysisIfAvailable(); - - auto *MSSAWP = getAnalysisIfAvailable(); return Impl.runImpl( F, getAnalysis().getAssumptionCache(F), getAnalysis().getDomTree(), getAnalysis().getTLI(F), getAnalysis().getAAResults(), - Impl.isMemDepEnabled() - ? &getAnalysis().getMemDep() - : nullptr, LIWP ? &LIWP->getLoopInfo() : nullptr, &getAnalysis().getORE(), - MSSAWP ? &MSSAWP->getMSSA() : nullptr); + Impl.isMemorySSAEnabled() + ? &getAnalysis().getMSSA() + : nullptr); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -3203,8 +3661,8 @@ AU.addRequired(); AU.addRequired(); AU.addRequired(); - if (Impl.isMemDepEnabled()) - AU.addRequired(); + if (Impl.isMemorySSAEnabled()) + AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addPreserved(); @@ -3222,7 +3680,7 @@ INITIALIZE_PASS_BEGIN(GVNLegacyPass, "gvn", "Global Value Numbering", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) @@ -3231,6 +3689,4 @@ INITIALIZE_PASS_END(GVNLegacyPass, "gvn", "Global Value Numbering", false, false) // The public interface to this file... -FunctionPass *llvm::createGVNPass(bool NoMemDepAnalysis) { - return new GVNLegacyPass(NoMemDepAnalysis); -} +FunctionPass *llvm::createGVNPass() { return new GVNLegacyPass(); } Index: llvm/lib/Transforms/Scalar/GVNHoist.cpp =================================================================== --- llvm/lib/Transforms/Scalar/GVNHoist.cpp +++ llvm/lib/Transforms/Scalar/GVNHoist.cpp @@ -43,7 +43,6 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/IteratedDominanceFrontier.h" -#include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/PostDominators.h" @@ -259,8 +258,8 @@ class GVNHoist { public: GVNHoist(DominatorTree *DT, PostDominatorTree *PDT, AliasAnalysis *AA, - MemoryDependenceResults *MD, MemorySSA *MSSA) - : DT(DT), PDT(PDT), AA(AA), MD(MD), MSSA(MSSA), + MemorySSA *MSSA) + : DT(DT), PDT(PDT), AA(AA), MSSA(MSSA), MSSAUpdater(std::make_unique(MSSA)) { MSSA->ensureOptimizedUses(); } @@ -280,7 +279,6 @@ DominatorTree *DT; PostDominatorTree *PDT; AliasAnalysis *AA; - MemoryDependenceResults *MD; MemorySSA *MSSA; std::unique_ptr MSSAUpdater; DenseMap DFSNumber; @@ -533,10 +531,9 @@ auto &DT = getAnalysis().getDomTree(); auto &PDT = getAnalysis().getPostDomTree(); auto &AA = getAnalysis().getAAResults(); - auto &MD = getAnalysis().getMemDep(); auto &MSSA = getAnalysis().getMSSA(); - GVNHoist G(&DT, &PDT, &AA, &MD, &MSSA); + GVNHoist G(&DT, &PDT, &AA, &MSSA); return G.run(F); } @@ -544,7 +541,6 @@ AU.addRequired(); AU.addRequired(); AU.addRequired(); - AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addPreserved(); @@ -556,7 +552,6 @@ NumFuncArgs = F.arg_size(); VN.setDomTree(DT); VN.setAliasAnalysis(AA); - VN.setMemDep(MD); bool Res = false; // Perform DFS Numbering of instructions. unsigned BBI = 0; @@ -1022,8 +1017,6 @@ Repl->andIRFlags(I); combineKnownMetadata(Repl, I); I->replaceAllUsesWith(Repl); - // Also invalidate the Alias Analysis cache. - MD->removeInstruction(I); I->eraseFromParent(); } } @@ -1143,7 +1136,6 @@ // Move the instruction at the end of HoistPt. Instruction *Last = DestBB->getTerminator(); - MD->removeInstruction(Repl); Repl->moveBefore(Last); DFSNumber[Repl] = DFSNumber[Last]++; @@ -1240,9 +1232,8 @@ DominatorTree &DT = AM.getResult(F); PostDominatorTree &PDT = AM.getResult(F); AliasAnalysis &AA = AM.getResult(F); - MemoryDependenceResults &MD = AM.getResult(F); MemorySSA &MSSA = AM.getResult(F).getMSSA(); - GVNHoist G(&DT, &PDT, &AA, &MD, &MSSA); + GVNHoist G(&DT, &PDT, &AA, &MSSA); if (!G.run(F)) return PreservedAnalyses::all(); @@ -1256,7 +1247,6 @@ INITIALIZE_PASS_BEGIN(GVNHoistLegacyPass, "gvn-hoist", "Early GVN Hoisting of Expressions", false, false) -INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) Index: llvm/test/Analysis/MemoryDependenceAnalysis/InvariantLoad.ll =================================================================== --- llvm/test/Analysis/MemoryDependenceAnalysis/InvariantLoad.ll +++ llvm/test/Analysis/MemoryDependenceAnalysis/InvariantLoad.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=gvn -S | FileCheck %s +; RUN: opt < %s -passes=gvn -S | FileCheck %s --check-prefixes=CHECK target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1" target triple = "x86_64-unknown-linux-gnu" @@ -10,20 +10,19 @@ define i8 @test(i1 %cmp) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[P:%.*]] = alloca i8 -; CHECK-NEXT: store i8 5, i8* [[P]] +; CHECK-NEXT: [[P:%.*]] = alloca i8, align 1 +; CHECK-NEXT: store i8 5, i8* [[P]], align 1 ; CHECK-NEXT: br label [[HEADER:%.*]] ; CHECK: header: -; CHECK-NEXT: [[V:%.*]] = phi i8 [ 5, [[ENTRY:%.*]] ], [ -5, [[ALIVE:%.*]] ] -; CHECK-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[I_INC:%.*]], [[ALIVE]] ] +; CHECK-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[I_INC:%.*]], [[ALIVE:%.*]] ] ; CHECK-NEXT: br i1 [[CMP:%.*]], label [[ALIVE]], label [[DEAD:%.*]] ; CHECK: dead: ; CHECK-NEXT: call void @foo(i8* [[P]]) -; CHECK-NEXT: [[I_1:%.*]] = add i8 [[I]], [[V]] +; CHECK-NEXT: [[I_1:%.*]] = add i8 [[I]], 5 ; CHECK-NEXT: br label [[ALIVE]] ; CHECK: alive: ; CHECK-NEXT: [[I_2:%.*]] = phi i8 [ [[I]], [[HEADER]] ], [ [[I_1]], [[DEAD]] ] -; CHECK-NEXT: store i8 -5, i8* [[P]] +; CHECK-NEXT: store i8 -5, i8* [[P]], align 1 ; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 [[P]], i8 0, i32 1, i1 false) ; CHECK-NEXT: [[I_INC]] = add i8 [[I_2]], 1 ; CHECK-NEXT: [[CMP_LOOP:%.*]] = icmp ugt i8 [[I_INC]], 100 @@ -31,7 +30,6 @@ ; CHECK: exit: ; CHECK-NEXT: ret i8 0 ; - entry: %p = alloca i8 %addr = getelementptr inbounds i8, i8* %p, i64 0 @@ -68,7 +66,7 @@ ; CHECK-NEXT: call void @foo(i8* [[P]]) ; CHECK-NEXT: br i1 [[CMP:%.*]], label [[B2:%.*]], label [[B1:%.*]] ; CHECK: b1: -; CHECK-NEXT: [[RES2:%.*]] = load i8, i8* [[P]] +; CHECK-NEXT: [[RES2:%.*]] = load i8, i8* [[P]], align 1 ; CHECK-NEXT: [[RES3:%.*]] = add i8 [[RES1]], [[RES2]] ; CHECK-NEXT: br label [[ALIVE:%.*]] ; CHECK: b2: @@ -78,7 +76,6 @@ ; CHECK-NEXT: [[RES_PHI:%.*]] = phi i8 [ [[RES3]], [[B1]] ], [ [[RES_DEAD]], [[B2]] ] ; CHECK-NEXT: ret i8 [[RES_PHI]] ; - entry: %res1 = load i8, i8* %p call void @foo(i8 *%p) @@ -106,7 +103,7 @@ ; CHECK-NEXT: call void @foo(i8* [[P]]) ; CHECK-NEXT: br i1 [[CMP:%.*]], label [[B1:%.*]], label [[B2:%.*]] ; CHECK: b1: -; CHECK-NEXT: [[RES2:%.*]] = load i8, i8* [[P]] +; CHECK-NEXT: [[RES2:%.*]] = load i8, i8* [[P]], align 1 ; CHECK-NEXT: [[RES3:%.*]] = add i8 [[RES1]], [[RES2]] ; CHECK-NEXT: br label [[ALIVE:%.*]] ; CHECK: b2: Index: llvm/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll =================================================================== --- llvm/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll +++ llvm/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll @@ -1,4 +1,4 @@ -; RUN: opt -tbaa -basic-aa -gvn -S < %s | FileCheck %s +; RUN: opt -tbaa -basic-aa -gvn -S < %s | FileCheck %s --check-prefixes=CHECK target datalayout = "e-p:64:64:64" @@ -30,14 +30,17 @@ ; the other type could be unified with the first type, however for now, GVN ; should just be conservative. +; However, with the MemorySSA changes this no longer happens and GVN optimises +; it just like in the next function. + ; CHECK: @watch_out_for_type_change ; CHECK: if.then: ; CHECK: %t = load i32, i32* %p ; CHECK: store i32 %t, i32* %q ; CHECK: ret void ; CHECK: if.else: -; CHECK: %u = load i32, i32* %p -; CHECK: store i32 %u, i32* %q +; CHECK-NEXT: store i32 0, i32* %q +; CHECK-NEXT: ret void define void @watch_out_for_type_change(i1 %c, i32* %p, i32* %p1, i32* %q) nounwind { entry: Index: llvm/test/Analysis/ValueTracking/gep-negative-issue.ll =================================================================== --- llvm/test/Analysis/ValueTracking/gep-negative-issue.ll +++ llvm/test/Analysis/ValueTracking/gep-negative-issue.ll @@ -5,16 +5,16 @@ %ArrayImpl = type { i64, i64 addrspace(100)*, [1 x i64], [1 x i64], [1 x i64], i64, i64, double addrspace(100)*, double addrspace(100)*, i8, i64 } %_array = type { i64, %ArrayImpl addrspace(100)*, i8 } -define void @test(i64 %n_chpl) { +define void @test(%_array* %p) { entry: ; First section is some code - %0 = getelementptr inbounds %_array, %_array* null, i32 0, i32 1 + %0 = getelementptr inbounds %_array, %_array* %p, i32 0, i32 1 %1 = load %ArrayImpl addrspace(100)*, %ArrayImpl addrspace(100)** %0 %2 = getelementptr inbounds %ArrayImpl, %ArrayImpl addrspace(100)* %1, i32 0, i32 8 %3 = load double addrspace(100)*, double addrspace(100)* addrspace(100)* %2 %4 = getelementptr inbounds double, double addrspace(100)* %3, i64 -1 ; Second section is that code repeated - %x0 = getelementptr inbounds %_array, %_array* null, i32 0, i32 1 + %x0 = getelementptr inbounds %_array, %_array* %p, i32 0, i32 1 %x1 = load %ArrayImpl addrspace(100)*, %ArrayImpl addrspace(100)** %x0 %x2 = getelementptr inbounds %ArrayImpl, %ArrayImpl addrspace(100)* %x1, i32 0, i32 8 %x3 = load double addrspace(100)*, double addrspace(100)* addrspace(100)* %x2 Index: llvm/test/CodeGen/AMDGPU/llc-pipeline.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -1037,9 +1037,8 @@ ; GCN-O3-NEXT: Speculatively execute instructions ; GCN-O3-NEXT: Scalar Evolution Analysis ; GCN-O3-NEXT: Straight line strength reduction -; GCN-O3-NEXT: Phi Values Analysis ; GCN-O3-NEXT: Function Alias Analysis Results -; GCN-O3-NEXT: Memory Dependence Analysis +; GCN-O3-NEXT: Memory SSA ; GCN-O3-NEXT: Optimization Remark Emitter ; GCN-O3-NEXT: Global Value Numbering ; GCN-O3-NEXT: Scalar Evolution Analysis @@ -1076,10 +1075,9 @@ ; GCN-O3-NEXT: Expand reduction intrinsics ; GCN-O3-NEXT: Natural Loop Information ; GCN-O3-NEXT: TLS Variable Hoist -; GCN-O3-NEXT: Phi Values Analysis ; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O3-NEXT: Function Alias Analysis Results -; GCN-O3-NEXT: Memory Dependence Analysis +; GCN-O3-NEXT: Memory SSA ; GCN-O3-NEXT: Lazy Branch Probability Analysis ; GCN-O3-NEXT: Lazy Block Frequency Analysis ; GCN-O3-NEXT: Optimization Remark Emitter Index: llvm/test/Other/new-pm-defaults.ll =================================================================== --- llvm/test/Other/new-pm-defaults.ll +++ llvm/test/Other/new-pm-defaults.ll @@ -188,8 +188,6 @@ ; CHECK-MATRIX: Running pass: VectorCombinePass ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-O23SZ-NEXT: Running pass: GVNPass -; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis -; CHECK-O23SZ-NEXT: Running analysis: PhiValuesAnalysis ; CHECK-O1-NEXT: Running pass: MemCpyOptPass ; CHECK-O-NEXT: Running pass: SCCPPass ; CHECK-O-NEXT: Running pass: BDCEPass Index: llvm/test/Other/new-pm-lto-defaults.ll =================================================================== --- llvm/test/Other/new-pm-lto-defaults.ll +++ llvm/test/Other/new-pm-lto-defaults.ll @@ -108,8 +108,6 @@ ; CHECK-O23SZ-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O23SZ-NEXT: Running pass: LICMPass on Loop ; CHECK-O23SZ-NEXT: Running pass: GVNPass on foo -; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis on foo -; CHECK-O23SZ-NEXT: Running analysis: PhiValuesAnalysis on foo ; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass on foo ; CHECK-O23SZ-NEXT: Running pass: DSEPass on foo ; CHECK-O23SZ-NEXT: Running analysis: PostDominatorTreeAnalysis on foo Index: llvm/test/Other/new-pm-print-pipeline.ll =================================================================== --- llvm/test/Other/new-pm-print-pipeline.ll +++ llvm/test/Other/new-pm-print-pipeline.ll @@ -34,8 +34,8 @@ ; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(loop-unroll<>,loop-unroll,loop-unroll)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-10 ; CHECK-10: function(loop-unroll,loop-unroll,loop-unroll) -; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(gvn<>,gvn,gvn)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-11 -; CHECK-11: function(gvn<>,gvn,gvn) +; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(gvn<>,gvn,gvn)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-11 +; CHECK-11: function(gvn<>,gvn,gvn) ; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(early-cse<>,early-cse)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-12 ; CHECK-12: function(early-cse<>,early-cse) Index: llvm/test/Other/new-pm-thinlto-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-defaults.ll +++ llvm/test/Other/new-pm-thinlto-defaults.ll @@ -148,20 +148,12 @@ ; CHECK-O-NEXT: Running pass: SROAPass on foo ; CHECK-Os-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-Os-NEXT: Running pass: GVNPass -; CHECK-Os-NEXT: Running analysis: MemoryDependenceAnalysis -; CHECK-Os-NEXT: Running analysis: PhiValuesAnalysis ; CHECK-Oz-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-Oz-NEXT: Running pass: GVNPass -; CHECK-Oz-NEXT: Running analysis: MemoryDependenceAnalysis -; CHECK-Oz-NEXT: Running analysis: PhiValuesAnalysis ; CHECK-O2-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-O2-NEXT: Running pass: GVNPass -; CHECK-O2-NEXT: Running analysis: MemoryDependenceAnalysis -; CHECK-O2-NEXT: Running analysis: PhiValuesAnalysis ; CHECK-O3-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-O3-NEXT: Running pass: GVNPass -; CHECK-O3-NEXT: Running analysis: MemoryDependenceAnalysis -; CHECK-O3-NEXT: Running analysis: PhiValuesAnalysis ; CHECK-O1-NEXT: Running pass: MemCpyOptPass ; CHECK-O-NEXT: Running pass: SCCPPass ; CHECK-O-NEXT: Running pass: BDCEPass Index: llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -121,20 +121,12 @@ ; CHECK-O-NEXT: Running pass: SROAPass on foo ; CHECK-Os-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-Os-NEXT: Running pass: GVNPass -; CHECK-Os-NEXT: Running analysis: MemoryDependenceAnalysis -; CHECK-Os-NEXT: Running analysis: PhiValuesAnalysis ; CHECK-Oz-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-Oz-NEXT: Running pass: GVNPass -; CHECK-Oz-NEXT: Running analysis: MemoryDependenceAnalysis -; CHECK-Oz-NEXT: Running analysis: PhiValuesAnalysis ; CHECK-O2-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-O2-NEXT: Running pass: GVNPass -; CHECK-O2-NEXT: Running analysis: MemoryDependenceAnalysis -; CHECK-O2-NEXT: Running analysis: PhiValuesAnalysis ; CHECK-O3-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-O3-NEXT: Running pass: GVNPass -; CHECK-O3-NEXT: Running analysis: MemoryDependenceAnalysis -; CHECK-O3-NEXT: Running analysis: PhiValuesAnalysis ; CHECK-O1-NEXT: Running pass: MemCpyOptPass ; CHECK-O-NEXT: Running pass: SCCPPass ; CHECK-O-NEXT: Running pass: BDCEPass Index: llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -130,20 +130,12 @@ ; CHECK-O-NEXT: Running pass: SROAPass on foo ; CHECK-Os-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-Os-NEXT: Running pass: GVNPass -; CHECK-Os-NEXT: Running analysis: MemoryDependenceAnalysis -; CHECK-Os-NEXT: Running analysis: PhiValuesAnalysis ; CHECK-Oz-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-Oz-NEXT: Running pass: GVNPass -; CHECK-Oz-NEXT: Running analysis: MemoryDependenceAnalysis -; CHECK-Oz-NEXT: Running analysis: PhiValuesAnalysis ; CHECK-O2-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-O2-NEXT: Running pass: GVNPass -; CHECK-O2-NEXT: Running analysis: MemoryDependenceAnalysis -; CHECK-O2-NEXT: Running analysis: PhiValuesAnalysis ; CHECK-O3-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-O3-NEXT: Running pass: GVNPass -; CHECK-O3-NEXT: Running analysis: MemoryDependenceAnalysis -; CHECK-O3-NEXT: Running analysis: PhiValuesAnalysis ; CHECK-O1-NEXT: Running pass: MemCpyOptPass ; CHECK-O-NEXT: Running pass: SCCPPass ; CHECK-O-NEXT: Running pass: BDCEPass Index: llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll +++ llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll @@ -159,20 +159,12 @@ ; CHECK-O-NEXT: Running pass: SROAPass on foo ; CHECK-Os-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-Os-NEXT: Running pass: GVNPass -; CHECK-Os-NEXT: Running analysis: MemoryDependenceAnalysis -; CHECK-Os-NEXT: Running analysis: PhiValuesAnalysis ; CHECK-Oz-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-Oz-NEXT: Running pass: GVNPass -; CHECK-Oz-NEXT: Running analysis: MemoryDependenceAnalysis -; CHECK-Oz-NEXT: Running analysis: PhiValuesAnalysis ; CHECK-O2-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-O2-NEXT: Running pass: GVNPass -; CHECK-O2-NEXT: Running analysis: MemoryDependenceAnalysis -; CHECK-O2-NEXT: Running analysis: PhiValuesAnalysis ; CHECK-O3-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-O3-NEXT: Running pass: GVNPass -; CHECK-O3-NEXT: Running analysis: MemoryDependenceAnalysis -; CHECK-O3-NEXT: Running analysis: PhiValuesAnalysis ; CHECK-O1-NEXT: Running pass: MemCpyOptPass ; CHECK-O-NEXT: Running pass: SCCPPass ; CHECK-O-NEXT: Running pass: BDCEPass Index: llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +++ llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll @@ -124,20 +124,12 @@ ; CHECK-O-NEXT: Running pass: SROAPass on foo ; CHECK-Os-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-Os-NEXT: Running pass: GVNPass -; CHECK-Os-NEXT: Running analysis: MemoryDependenceAnalysis -; CHECK-Os-NEXT: Running analysis: PhiValuesAnalysis ; CHECK-Oz-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-Oz-NEXT: Running pass: GVNPass -; CHECK-Oz-NEXT: Running analysis: MemoryDependenceAnalysis -; CHECK-Oz-NEXT: Running analysis: PhiValuesAnalysis ; CHECK-O2-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-O2-NEXT: Running pass: GVNPass -; CHECK-O2-NEXT: Running analysis: MemoryDependenceAnalysis -; CHECK-O2-NEXT: Running analysis: PhiValuesAnalysis ; CHECK-O3-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-O3-NEXT: Running pass: GVNPass -; CHECK-O3-NEXT: Running analysis: MemoryDependenceAnalysis -; CHECK-O3-NEXT: Running analysis: PhiValuesAnalysis ; CHECK-O1-NEXT: Running pass: MemCpyOptPass ; CHECK-O-NEXT: Running pass: SCCPPass ; CHECK-O-NEXT: Running pass: BDCEPass Index: llvm/test/Transforms/GVN/PRE/rle.ll =================================================================== --- llvm/test/Transforms/GVN/PRE/rle.ll +++ llvm/test/Transforms/GVN/PRE/rle.ll @@ -1,4 +1,3 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -data-layout="e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-n8:16:32" -basic-aa -gvn -enable-split-backedge-in-load-pre -S -dce | FileCheck %s --check-prefixes=CHECK,LE ; RUN: opt < %s -data-layout="E-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-n32" -basic-aa -gvn -enable-split-backedge-in-load-pre -S -dce | FileCheck %s --check-prefixes=CHECK,BE @@ -1014,11 +1013,11 @@ ; CHECK-NEXT: [[XX:%.*]] = bitcast i8* [[P:%.*]] to i32* ; CHECK-NEXT: [[X1:%.*]] = load i32, i32* [[XX]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X1]], 127 -; LE-NEXT: [[TMP0:%.*]] = lshr i32 [[X1]], 16 -; BE-NEXT: [[TMP0:%.*]] = lshr i32 [[X1]], 8 +; LE-NEXT: [[TMP0:%.*]] = lshr i32 [[X1]], 8 +; BE-NEXT: [[TMP0:%.*]] = lshr i32 [[X1]], 16 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8 -; LE-NEXT: [[TMP2:%.*]] = lshr i32 [[X1]], 8 -; BE-NEXT: [[TMP2:%.*]] = lshr i32 [[X1]], 16 +; LE-NEXT: [[TMP2:%.*]] = lshr i32 [[X1]], 16 +; BE-NEXT: [[TMP2:%.*]] = lshr i32 [[X1]], 8 ; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8 ; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[ELSE:%.*]] ; CHECK: if: @@ -1026,7 +1025,8 @@ ; CHECK: else: ; CHECK-NEXT: br label [[JOIN]] ; CHECK: join: -; CHECK-NEXT: [[TTMP5:%.*]] = phi i8 [ [[TMP3]], [[IF]] ], [ [[TMP1]], [[ELSE]] ] +; LE-NEXT: [[TTMP5:%.*]] = phi i8 [ [[TMP1]], [[IF]] ], [ [[TMP3]], [[ELSE]] ] +; BE-NEXT: [[TTMP5:%.*]] = phi i8 [ [[TMP1]], [[IF]] ], [ [[TMP3]], [[ELSE]] ] ; CHECK-NEXT: [[CONV6:%.*]] = zext i8 [[TTMP5]] to i32 ; CHECK-NEXT: ret i32 [[CONV6]] ; CHECK: if.end: Index: llvm/test/Transforms/GVN/condprop-memdep-invalidation.ll =================================================================== --- llvm/test/Transforms/GVN/condprop-memdep-invalidation.ll +++ llvm/test/Transforms/GVN/condprop-memdep-invalidation.ll @@ -35,23 +35,18 @@ ; CHECK-NEXT: call void @use(i16 [[L_3]]) ; CHECK-NEXT: br label [[LOOP_1_LATCH]] ; CHECK: loop.1.latch: -; CHECK-NEXT: [[L_42:%.*]] = phi i16 [ [[L_3]], [[ELSE_2]] ], [ [[L_2]], [[THEN_2]] ] ; CHECK-NEXT: [[IV_1_NEXT]] = add i16 [[IV_1]], 1 ; CHECK-NEXT: [[CMP_3:%.*]] = icmp slt i16 [[IV_1_NEXT]], 2 ; CHECK-NEXT: br i1 [[CMP_3]], label [[LOOP_1_HEADER]], label [[LOOP_2:%.*]] ; CHECK: loop.2: -; CHECK-NEXT: [[L_4:%.*]] = phi i16 [ [[L_42]], [[LOOP_1_LATCH]] ], [ [[L_4_PRE:%.*]], [[LOOP_2_LOOP_2_CRIT_EDGE:%.*]] ] -; CHECK-NEXT: [[IV_2:%.*]] = phi i16 [ 0, [[LOOP_1_LATCH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP_2_LOOP_2_CRIT_EDGE]] ] -; CHECK-NEXT: [[SUM:%.*]] = phi i16 [ 0, [[LOOP_1_LATCH]] ], [ [[SUM_NEXT:%.*]], [[LOOP_2_LOOP_2_CRIT_EDGE]] ] +; CHECK-NEXT: [[IV_2:%.*]] = phi i16 [ 0, [[LOOP_1_LATCH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP_2]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i16 [ 0, [[LOOP_1_LATCH]] ], [ [[SUM_NEXT:%.*]], [[LOOP_2]] ] ; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr [4 x i16], ptr [[UB_16]], i16 1, i16 [[IV_2]] +; CHECK-NEXT: [[L_4:%.*]] = load i16, ptr [[GEP_5]], align 2 ; CHECK-NEXT: [[SUM_NEXT]] = add i16 [[SUM]], [[L_4]] ; CHECK-NEXT: [[IV_2_NEXT]] = add i16 [[IV_2]], 1 ; CHECK-NEXT: [[CMP_4:%.*]] = icmp slt i16 [[IV_2_NEXT]], 2 -; CHECK-NEXT: br i1 [[CMP_4]], label [[LOOP_2_LOOP_2_CRIT_EDGE]], label [[EXIT:%.*]] -; CHECK: loop.2.loop.2_crit_edge: -; CHECK-NEXT: [[GEP_5_PHI_TRANS_INSERT:%.*]] = getelementptr [4 x i16], ptr [[UB_16]], i16 1, i16 [[IV_2_NEXT]] -; CHECK-NEXT: [[L_4_PRE]] = load i16, ptr [[GEP_5_PHI_TRANS_INSERT]], align 2 -; CHECK-NEXT: br label [[LOOP_2]] +; CHECK-NEXT: br i1 [[CMP_4]], label [[LOOP_2]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret i16 [[SUM_NEXT]] ; Index: llvm/test/Transforms/GVN/no-mem-dep-info.ll =================================================================== --- llvm/test/Transforms/GVN/no-mem-dep-info.ll +++ llvm/test/Transforms/GVN/no-mem-dep-info.ll @@ -1,5 +1,5 @@ -; RUN: opt %s -gvn -S -enable-gvn-memdep=false | FileCheck %s -; RUN: opt %s -gvn -S -enable-gvn-memdep=true | FileCheck %s +; RUN: opt %s -gvn -S -enable-gvn-memoryssa=false | FileCheck %s +; RUN: opt %s -gvn -S -enable-gvn-memoryssa=true | FileCheck %s ; Check that llvm.x86.avx2.gather.d.ps.256 intrinsic is not eliminated by GVN ; with and without memory dependence info. Index: llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll +++ llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll @@ -1850,7 +1850,7 @@ ; O3DEFAULT-NEXT: [[TMP47:%.*]] = add nsw <4 x i32> [[TMP46]], [[SHUFFLE]] ; O3DEFAULT-NEXT: [[TMP48:%.*]] = bitcast i32* [[ARRAYIDX2_44]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP47]], <4 x i32>* [[TMP48]], align 4 -; O3DEFAULT-NEXT: [[TMP49:%.*]] = load i32, i32* [[A]], align 4 +; O3DEFAULT-NEXT: [[TMP49:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 ; O3DEFAULT-NEXT: ret i32 [[TMP49]] ; ; Os-LABEL: @disabled(