diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h --- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h +++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h @@ -38,9 +38,9 @@ class MemCpyOptPass : public PassInfoMixin { MemoryDependenceResults *MD = nullptr; TargetLibraryInfo *TLI = nullptr; - std::function LookupAliasAnalysis; - std::function LookupAssumptionCache; - std::function LookupDomTree; + AliasAnalysis *AA = nullptr; + AssumptionCache *AC = nullptr; + DominatorTree *DT = nullptr; public: MemCpyOptPass() = default; @@ -49,10 +49,8 @@ // Glue for the old PM. bool runImpl(Function &F, MemoryDependenceResults *MD_, - TargetLibraryInfo *TLI_, - std::function LookupAliasAnalysis_, - std::function LookupAssumptionCache_, - std::function LookupDomTree_); + TargetLibraryInfo *TLI_, AliasAnalysis *AA_, + AssumptionCache *AC_, DominatorTree *DT_); private: // Helper functions diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -271,10 +271,12 @@ AU.setPreservesCFG(); AU.addRequired(); AU.addRequired(); - AU.addRequired(); + AU.addPreserved(); AU.addRequired(); - AU.addRequired(); + AU.addPreserved(); AU.addPreserved(); + AU.addRequired(); + AU.addRequired(); AU.addPreserved(); } }; @@ -522,7 +524,6 @@ auto *T = LI->getType(); if (T->isAggregateType()) { - AliasAnalysis &AA = LookupAliasAnalysis(); MemoryLocation LoadLoc = MemoryLocation::get(LI); // We use alias analysis to check if an instruction may store to @@ -531,7 +532,7 @@ // of at the store position. Instruction *P = SI; for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) { - if (isModSet(AA.getModRefInfo(&I, LoadLoc))) { + if (isModSet(AA->getModRefInfo(&I, LoadLoc))) { P = &I; break; } @@ -542,7 +543,7 @@ // position if nothing alias the store memory after this and the store // destination is not in the range. if (P && P != SI) { - if (!moveUp(AA, SI, P, LI)) + if (!moveUp(*AA, SI, P, LI)) P = nullptr; } @@ -553,7 +554,7 @@ // memmove must be used to preserve semantic. If not, memcpy can // be used. bool UseMemMove = false; - if (!AA.isNoAlias(MemoryLocation::get(SI), LoadLoc)) + if (!AA->isNoAlias(MemoryLocation::get(SI), LoadLoc)) UseMemMove = true; uint64_t Size = DL.getTypeStoreSize(T); @@ -597,11 +598,10 @@ // the call and the store. Value *CpyDest = SI->getPointerOperand()->stripPointerCasts(); bool CpyDestIsLocal = isa(CpyDest); - AliasAnalysis &AA = LookupAliasAnalysis(); MemoryLocation StoreLoc = MemoryLocation::get(SI); for (BasicBlock::iterator I = --SI->getIterator(), E = C->getIterator(); I != E; --I) { - if (isModOrRefSet(AA.getModRefInfo(&*I, StoreLoc))) { + if (isModOrRefSet(AA->getModRefInfo(&*I, StoreLoc))) { C = nullptr; break; } @@ -811,20 +811,18 @@ // Since we're changing the parameter to the callsite, we need to make sure // that what would be the new parameter dominates the callsite. - DominatorTree &DT = LookupDomTree(); if (Instruction *cpyDestInst = dyn_cast(cpyDest)) - if (!DT.dominates(cpyDestInst, C)) + if (!DT->dominates(cpyDestInst, C)) return false; // In addition to knowing that the call does not access src in some // unexpected manner, for example via a global, which we deduce from // the use analysis, we also need to know that it does not sneakily // access dest. We rely on AA to figure this out for us. - AliasAnalysis &AA = LookupAliasAnalysis(); - ModRefInfo MR = AA.getModRefInfo(C, cpyDest, LocationSize::precise(srcSize)); + ModRefInfo MR = AA->getModRefInfo(C, cpyDest, LocationSize::precise(srcSize)); // If necessary, perform additional analysis. if (isModOrRefSet(MR)) - MR = AA.callCapturesBefore(C, cpyDest, LocationSize::precise(srcSize), &DT); + MR = AA->callCapturesBefore(C, cpyDest, LocationSize::precise(srcSize), DT); if (isModOrRefSet(MR)) return false; @@ -908,8 +906,6 @@ if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue()) return false; - AliasAnalysis &AA = LookupAliasAnalysis(); - // Verify that the copied-from memory doesn't change in between the two // transfers. For example, in: // memcpy(a <- b) @@ -932,8 +928,8 @@ // source and dest might overlap. We still want to eliminate the intermediate // value, but we have to generate a memmove instead of memcpy. bool UseMemMove = false; - if (!AA.isNoAlias(MemoryLocation::getForDest(M), - MemoryLocation::getForSource(MDep))) + if (!AA->isNoAlias(MemoryLocation::getForDest(M), + MemoryLocation::getForSource(MDep))) UseMemMove = true; // If all checks passed, then we can transform M. @@ -1057,11 +1053,9 @@ /// The \p MemCpy must have a Constant length. bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, MemSetInst *MemSet) { - AliasAnalysis &AA = LookupAliasAnalysis(); - // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and // memcpying from the same address. Otherwise it is hard to reason about. - if (!AA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource())) + if (!AA->isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource())) return false; // A known memset size is required. @@ -1189,14 +1183,12 @@ /// Transforms memmove calls to memcpy calls when the src/dst are guaranteed /// not to alias. bool MemCpyOptPass::processMemMove(MemMoveInst *M) { - AliasAnalysis &AA = LookupAliasAnalysis(); - if (!TLI->has(LibFunc_memmove)) return false; // See if the pointers alias. - if (!AA.isNoAlias(MemoryLocation::getForDest(M), - MemoryLocation::getForSource(M))) + if (!AA->isNoAlias(MemoryLocation::getForDest(M), + MemoryLocation::getForSource(M))) return false; LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M @@ -1250,12 +1242,10 @@ // If it is greater than the memcpy, then we check to see if we can force the // source of the memcpy to the alignment we need. If we fail, we bail out. - AssumptionCache &AC = LookupAssumptionCache(); - DominatorTree &DT = LookupDomTree(); MaybeAlign MemDepAlign = MDep->getSourceAlign(); if ((!MemDepAlign || *MemDepAlign < *ByValAlign) && - getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, &AC, - &DT) < *ByValAlign) + getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC, + DT) < *ByValAlign) return false; // The address space of the memcpy source must match the byval argument @@ -1301,15 +1291,13 @@ bool MemCpyOptPass::iterateOnFunction(Function &F) { bool MadeChange = false; - DominatorTree &DT = LookupDomTree(); - // Walk all instruction in the function. for (BasicBlock &BB : F) { // Skip unreachable blocks. For example processStore assumes that an // instruction in a BB can't be dominated by a later instruction in the // same BB (which is a scenario that can happen for an unreachable BB that // has itself as a predecessor). - if (!DT.isReachableFromEntry(&BB)) + if (!DT->isReachableFromEntry(&BB)) continue; for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) { @@ -1347,19 +1335,11 @@ PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) { auto &MD = AM.getResult(F); auto &TLI = AM.getResult(F); + auto *AA = &AM.getResult(F); + auto *AC = &AM.getResult(F); + auto *DT = &AM.getResult(F); - auto LookupAliasAnalysis = [&]() -> AliasAnalysis & { - return AM.getResult(F); - }; - auto LookupAssumptionCache = [&]() -> AssumptionCache & { - return AM.getResult(F); - }; - auto LookupDomTree = [&]() -> DominatorTree & { - return AM.getResult(F); - }; - - bool MadeChange = runImpl(F, &MD, &TLI, LookupAliasAnalysis, - LookupAssumptionCache, LookupDomTree); + bool MadeChange = runImpl(F, &MD, &TLI, AA, AC, DT); if (!MadeChange) return PreservedAnalyses::all(); @@ -1370,18 +1350,15 @@ return PA; } -bool MemCpyOptPass::runImpl( - Function &F, MemoryDependenceResults *MD_, TargetLibraryInfo *TLI_, - std::function LookupAliasAnalysis_, - std::function LookupAssumptionCache_, - std::function LookupDomTree_) { +bool MemCpyOptPass::runImpl(Function &F, MemoryDependenceResults *MD_, + TargetLibraryInfo *TLI_, AliasAnalysis *AA_, + AssumptionCache *AC_, DominatorTree *DT_) { bool MadeChange = false; MD = MD_; TLI = TLI_; - LookupAliasAnalysis = std::move(LookupAliasAnalysis_); - LookupAssumptionCache = std::move(LookupAssumptionCache_); - LookupDomTree = std::move(LookupDomTree_); - + AA = AA_; + AC = AC_; + DT = DT_; // If we don't have at least memset and memcpy, there is little point of doing // anything here. These are required by a freestanding implementation, so if // even they are disabled, there is no point in trying hard. @@ -1405,17 +1382,9 @@ auto *MD = &getAnalysis().getMemDep(); auto *TLI = &getAnalysis().getTLI(F); + auto *AA = &getAnalysis().getAAResults(); + auto *AC = &getAnalysis().getAssumptionCache(F); + auto *DT = &getAnalysis().getDomTree(); - auto LookupAliasAnalysis = [this]() -> AliasAnalysis & { - return getAnalysis().getAAResults(); - }; - auto LookupAssumptionCache = [this, &F]() -> AssumptionCache & { - return getAnalysis().getAssumptionCache(F); - }; - auto LookupDomTree = [this]() -> DominatorTree & { - return getAnalysis().getDomTree(); - }; - - return Impl.runImpl(F, MD, TLI, LookupAliasAnalysis, LookupAssumptionCache, - LookupDomTree); + return Impl.runImpl(F, MD, TLI, AA, AC, DT); } diff --git a/llvm/test/Analysis/BasicAA/phi-values-usage.ll b/llvm/test/Analysis/BasicAA/phi-values-usage.ll --- a/llvm/test/Analysis/BasicAA/phi-values-usage.ll +++ b/llvm/test/Analysis/BasicAA/phi-values-usage.ll @@ -1,4 +1,4 @@ -; RUN: opt -debug-pass=Executions -phi-values -memcpyopt -instcombine -disable-output < %s 2>&1 | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-MEMCPY +; RUN: opt -debug-pass=Executions -phi-values -memcpyopt -instcombine -disable-output < %s 2>&1 | FileCheck %s -check-prefixes=CHECK,CHECK-MEMCPY ; RUN: opt -debug-pass=Executions -memdep -instcombine -disable-output < %s 2>&1 | FileCheck %s -check-prefix=CHECK ; Check that phi values is not run when it's not already available, and that @@ -9,8 +9,8 @@ ; CHECK: Executing Pass 'Memory Dependence Analysis' ; CHECK-MEMCPY: Executing Pass 'MemCpy Optimization' ; CHECK-MEMCPY-DAG: Freeing Pass 'MemCpy Optimization' -; CHECK-DAG: Freeing Pass 'Phi Values Analysis' ; CHECK-DAG: Freeing Pass 'Memory Dependence Analysis' +; CHECK-DAG: Freeing Pass 'Phi Values Analysis' ; CHECK-MEMCPY-NOT: Freeing Pass 'Basic Alias Analysis (stateless AA impl)' ; CHECK-NOT: Executing Pass 'Phi Values Analysis' ; CHECK-NOT: Executing Pass 'Basic Alias Analysis (stateless AA impl)' diff --git a/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll b/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll --- a/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll @@ -164,8 +164,8 @@ ; GCN-O1-NEXT: Delete dead loops ; GCN-O1-NEXT: Unroll loops ; GCN-O1-NEXT: SROA -; GCN-O1-NEXT: Phi Values Analysis ; GCN-O1-NEXT: Function Alias Analysis Results +; GCN-O1-NEXT: Phi Values Analysis ; GCN-O1-NEXT: Memory Dependence Analysis ; GCN-O1-NEXT: MemCpy Optimization ; GCN-O1-NEXT: Sparse Conditional Constant Propagation @@ -493,9 +493,9 @@ ; GCN-O2-NEXT: Lazy Block Frequency Analysis ; GCN-O2-NEXT: Optimization Remark Emitter ; GCN-O2-NEXT: Global Value Numbering -; GCN-O2-NEXT: Phi Values Analysis ; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O2-NEXT: Function Alias Analysis Results +; GCN-O2-NEXT: Phi Values Analysis ; GCN-O2-NEXT: Memory Dependence Analysis ; GCN-O2-NEXT: MemCpy Optimization ; GCN-O2-NEXT: Sparse Conditional Constant Propagation @@ -853,9 +853,9 @@ ; GCN-O3-NEXT: Lazy Block Frequency Analysis ; GCN-O3-NEXT: Optimization Remark Emitter ; GCN-O3-NEXT: Global Value Numbering -; GCN-O3-NEXT: Phi Values Analysis ; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O3-NEXT: Function Alias Analysis Results +; GCN-O3-NEXT: Phi Values Analysis ; GCN-O3-NEXT: Memory Dependence Analysis ; GCN-O3-NEXT: MemCpy Optimization ; GCN-O3-NEXT: Sparse Conditional Constant Propagation diff --git a/llvm/test/Other/opt-O2-pipeline.ll b/llvm/test/Other/opt-O2-pipeline.ll --- a/llvm/test/Other/opt-O2-pipeline.ll +++ b/llvm/test/Other/opt-O2-pipeline.ll @@ -140,9 +140,9 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Global Value Numbering -; CHECK-NEXT: Phi Values Analysis ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Phi Values Analysis ; CHECK-NEXT: Memory Dependence Analysis ; CHECK-NEXT: MemCpy Optimization ; CHECK-NEXT: Sparse Conditional Constant Propagation diff --git a/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll b/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll --- a/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll +++ b/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll @@ -145,9 +145,9 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Global Value Numbering -; CHECK-NEXT: Phi Values Analysis ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Phi Values Analysis ; CHECK-NEXT: Memory Dependence Analysis ; CHECK-NEXT: MemCpy Optimization ; CHECK-NEXT: Sparse Conditional Constant Propagation diff --git a/llvm/test/Other/opt-O3-pipeline.ll b/llvm/test/Other/opt-O3-pipeline.ll --- a/llvm/test/Other/opt-O3-pipeline.ll +++ b/llvm/test/Other/opt-O3-pipeline.ll @@ -145,9 +145,9 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Global Value Numbering -; CHECK-NEXT: Phi Values Analysis ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Phi Values Analysis ; CHECK-NEXT: Memory Dependence Analysis ; CHECK-NEXT: MemCpy Optimization ; CHECK-NEXT: Sparse Conditional Constant Propagation diff --git a/llvm/test/Other/opt-Os-pipeline.ll b/llvm/test/Other/opt-Os-pipeline.ll --- a/llvm/test/Other/opt-Os-pipeline.ll +++ b/llvm/test/Other/opt-Os-pipeline.ll @@ -126,9 +126,9 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Global Value Numbering -; CHECK-NEXT: Phi Values Analysis ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Phi Values Analysis ; CHECK-NEXT: Memory Dependence Analysis ; CHECK-NEXT: MemCpy Optimization ; CHECK-NEXT: Sparse Conditional Constant Propagation